From 4dc88858fee66aedae0d97396a5499677de9aff0 Mon Sep 17 00:00:00 2001 From: Marco Castelluccio Date: Tue, 17 Oct 2023 17:31:12 +0000 Subject: [PATCH] Bug 1859085 - Update xsimd to 11.1.0. r=padenot,sergesanspaille Changlog: 11.1.0 ------ * Introduce XSIMD_DEFAULT_ARCH to force default architecture (if any) * Remove C++ requirement on xsimd::exp10 scalar implementation 11.0.0 ------ * Provide a generic reducer * Fix ``find_package(xsimd)`` for xtl enabled xsimd, reloaded * Provide avx512f implementation of FMA and variant * Hexadecimal floating points are not a C++11 feature * back to slow implementation of exp10 on Windows * Changed bitwise_cast API * Provide generic signed /unsigned type conversion * Fixed sde location * Feature/incr decr Depends on D191042 Differential Revision: https://phabricator.services.mozilla.com/D191043 UltraBlame original commit: ca60a5861d4a8bd5de1c8e5a9f76c9e35767c38c --- third_party/xsimd/Changelog.rst | 206 ++++++ .../arch/generic/xsimd_generic_arithmetic.hpp | 174 +++++ .../arch/generic/xsimd_generic_details.hpp | 364 +++++++++++ .../xsimd/include/xsimd/arch/xsimd_avx.hpp | 152 +++++ .../xsimd/include/xsimd/arch/xsimd_neon.hpp | 1 + .../xsimd/include/xsimd/arch/xsimd_scalar.hpp | 188 +++++- .../xsimd/include/xsimd/arch/xsimd_sse2.hpp | 152 +++++ .../xsimd/include/xsimd/config/xsimd_arch.hpp | 364 +++++------ .../include/xsimd/config/xsimd_config.hpp | 4 +- .../include/xsimd/config/xsimd_cpuid.hpp | 180 +++-- .../xsimd/memory/xsimd_aligned_allocator.hpp | 7 - .../include/xsimd/memory/xsimd_alignment.hpp | 3 + .../xsimd/include/xsimd/types/xsimd_api.hpp | 613 ++++++++++++------ .../xsimd/types/xsimd_generic_arch.hpp | 16 + third_party/xsimd/moz.yaml | 24 +- 15 files changed, 2001 insertions(+), 447 deletions(-) diff --git a/third_party/xsimd/Changelog.rst b/third_party/xsimd/Changelog.rst index b0c7d55067b4b..9787247fe7ce9 100644 --- a/third_party/xsimd/Changelog.rst +++ b/third_party/xsimd/Changelog.rst @@ -49,6 +49,212 @@ Changelog = = = +11 +. +1 +. +0 +- +- +- +- +- +- +* +Introduce +XSIMD_DEFAULT_ARCH +to +force +default +architecture +( +if +any +) +* +Remove +C ++ ++ +requirement +on +xsimd +: +: +exp10 +scalar +implementation +* +Improve +and +test +documentation +11 +. +0 +. +0 +- +- +- +- +- +- +* +Provide +a +generic +reducer +* +Fix +find_package +( +xsimd +) +for +xtl +enabled +xsimd +reloaded +* +Cleanup +benchmark +code +* +Provide +avx512f +implementation +of +FMA +and +variant +* +Hexadecimal +floating +points +are +not +a +C ++ ++ +11 +feature +* +back +to +slow +implementation +of +exp10 +on +Windows +* +Changed +bitwise_cast +API +* +Provide +generic +signed +/ +unsigned +type +conversion +* +Fixed +sde +location +* +Feature +/ +incr +decr +* +Cleanup +documentation +10 +. +0 +. +0 +- +- +- +- +- +- +* +Fix +potential +ABI +issue +in +SVE +support +* +Disable +fast +exp10 +on +OSX +* +Assert +on +unaligned +memory +when +calling +aligned +load +/ +store +* +Fix +warning +about +uninitialized +storage +* +Always +forward +arch +parameter +* +Do +not +specialize +the +behavior +of +simd_return_type +for +char +* +Support +broadcasting +of +complex +batches +* +Make +xsimd +compatible +with +- +fno +- +exceptions +* +Provide +and +test +comparison +operators +overloads +that +accept +scalars 9 . 0 diff --git a/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp b/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp index 4b5438cc86243..7617f0ec6c109 100644 --- a/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp +++ b/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp @@ -173,6 +173,93 @@ class A class T +> +inline +batch +< +T +A +> +decr +( +batch +< +T +A +> +const +& +self +requires_arch +< +generic +> +) +noexcept +{ +return +self +- +T +( +1 +) +; +} +template +< +class +A +class +T +class +Mask +> +inline +batch +< +T +A +> +decr_if +( +batch +< +T +A +> +const +& +self +Mask +const +& +mask +requires_arch +< +generic +> +) +noexcept +{ +return +select +( +mask +decr +( +self +) +self +) +; +} +template +< +class +A +class +T class = typename @@ -1074,6 +1161,93 @@ class A class T +> +inline +batch +< +T +A +> +incr +( +batch +< +T +A +> +const +& +self +requires_arch +< +generic +> +) +noexcept +{ +return +self ++ +T +( +1 +) +; +} +template +< +class +A +class +T +class +Mask +> +inline +batch +< +T +A +> +incr_if +( +batch +< +T +A +> +const +& +self +Mask +const +& +mask +requires_arch +< +generic +> +) +noexcept +{ +return +select +( +mask +incr +( +self +) +self +) +; +} +template +< +class +A +class +T class > inline diff --git a/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_details.hpp b/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_details.hpp index 2b0440fcbd7c4..ec86c967acb3e 100644 --- a/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_details.hpp +++ b/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_details.hpp @@ -1530,6 +1530,370 @@ other_buffer namespace detail { +template +< +class +A +> +inline +batch +< +uint8_t +A +> +fast_cast +( +batch +< +int8_t +A +> +const +& +self +batch +< +uint8_t +A +> +const +& +requires_arch +< +generic +> +) +noexcept +{ +return +bitwise_cast +< +uint8_t +> +( +self +) +; +} +template +< +class +A +> +inline +batch +< +uint16_t +A +> +fast_cast +( +batch +< +int16_t +A +> +const +& +self +batch +< +uint16_t +A +> +const +& +requires_arch +< +generic +> +) +noexcept +{ +return +bitwise_cast +< +uint16_t +> +( +self +) +; +} +template +< +class +A +> +inline +batch +< +uint32_t +A +> +fast_cast +( +batch +< +int32_t +A +> +const +& +self +batch +< +uint32_t +A +> +const +& +requires_arch +< +generic +> +) +noexcept +{ +return +bitwise_cast +< +uint32_t +> +( +self +) +; +} +template +< +class +A +> +inline +batch +< +uint64_t +A +> +fast_cast +( +batch +< +int64_t +A +> +const +& +self +batch +< +uint64_t +A +> +const +& +requires_arch +< +generic +> +) +noexcept +{ +return +bitwise_cast +< +uint64_t +> +( +self +) +; +} +template +< +class +A +> +inline +batch +< +int8_t +A +> +fast_cast +( +batch +< +uint8_t +A +> +const +& +self +batch +< +int8_t +A +> +const +& +requires_arch +< +generic +> +) +noexcept +{ +return +bitwise_cast +< +int8_t +> +( +self +) +; +} +template +< +class +A +> +inline +batch +< +int16_t +A +> +fast_cast +( +batch +< +uint16_t +A +> +const +& +self +batch +< +int16_t +A +> +const +& +requires_arch +< +generic +> +) +noexcept +{ +return +bitwise_cast +< +int16_t +> +( +self +) +; +} +template +< +class +A +> +inline +batch +< +int32_t +A +> +fast_cast +( +batch +< +uint32_t +A +> +const +& +self +batch +< +int32_t +A +> +const +& +requires_arch +< +generic +> +) +noexcept +{ +return +bitwise_cast +< +int32_t +> +( +self +) +; +} +template +< +class +A +> +inline +batch +< +int64_t +A +> +fast_cast +( +batch +< +uint64_t +A +> +const +& +self +batch +< +int64_t +A +> +const +& +requires_arch +< +generic +> +) +noexcept +{ +return +bitwise_cast +< +int64_t +> +( +self +) +; +} +} +namespace +detail +{ struct with_fast_conversion { diff --git a/third_party/xsimd/include/xsimd/arch/xsimd_avx.hpp b/third_party/xsimd/include/xsimd/arch/xsimd_avx.hpp index a29555e0622c5..95cc5d7b3336d 100644 --- a/third_party/xsimd/include/xsimd/arch/xsimd_avx.hpp +++ b/third_party/xsimd/include/xsimd/arch/xsimd_avx.hpp @@ -4518,6 +4518,82 @@ template < class A +class +T +class += +typename +std +: +: +enable_if +< +std +: +: +is_integral +< +T +> +: +: +value +void +> +: +: +type +> +inline +batch +< +T +A +> +decr_if +( +batch +< +T +A +> +const +& +self +batch_bool +< +T +A +> +const +& +mask +requires_arch +< +avx +> +) +noexcept +{ +return +self ++ +batch +< +T +A +> +( +mask +. +data +) +; +} +template +< +class +A > inline batch @@ -5923,6 +5999,82 @@ class A class T +class += +typename +std +: +: +enable_if +< +std +: +: +is_integral +< +T +> +: +: +value +void +> +: +: +type +> +inline +batch +< +T +A +> +incr_if +( +batch +< +T +A +> +const +& +self +batch_bool +< +T +A +> +const +& +mask +requires_arch +< +avx +> +) +noexcept +{ +return +self +- +batch +< +T +A +> +( +mask +. +data +) +; +} +template +< +class +A +class +T size_t I class diff --git a/third_party/xsimd/include/xsimd/arch/xsimd_neon.hpp b/third_party/xsimd/include/xsimd/arch/xsimd_neon.hpp index 6b15253ee014a..97553c7de8c05 100644 --- a/third_party/xsimd/include/xsimd/arch/xsimd_neon.hpp +++ b/third_party/xsimd/include/xsimd/arch/xsimd_neon.hpp @@ -2071,6 +2071,7 @@ complex < float > +A > ( float32x4_t diff --git a/third_party/xsimd/include/xsimd/arch/xsimd_scalar.hpp b/third_party/xsimd/include/xsimd/arch/xsimd_scalar.hpp index abc09a5f00673..17a2d61c1029f 100644 --- a/third_party/xsimd/include/xsimd/arch/xsimd_scalar.hpp +++ b/third_party/xsimd/include/xsimd/arch/xsimd_scalar.hpp @@ -701,6 +701,99 @@ class T > inline +T +incr +( +T +const +& +x +) +noexcept +{ +return +x ++ +T +( +1 +) +; +} +template +< +class +T +> +inline +T +incr_if +( +T +const +& +x +bool +mask +) +noexcept +{ +return +x ++ +T +( +mask +? +1 +: +0 +) +; +} +inline +bool +all +( +bool +mask +) +{ +return +mask +; +} +inline +bool +any +( +bool +mask +) +{ +return +mask +; +} +inline +bool +none +( +bool +mask +) +{ +return +! +mask +; +} +template +< +class +T +> +inline typename std : @@ -3113,17 +3206,27 @@ x ) noexcept { +const +float +ln10 += +std +: +: +log +( +10 +. +f +) +; return std : : exp ( -0x1 -. -26bb1cp -+ -1f +ln10 * x ) @@ -3140,17 +3243,26 @@ x ) noexcept { +const +double +ln10 += +std +: +: +log +( +10 +. +) +; return std : : exp ( -0x1 -. -26bb1bbb55516p -+ -1 +ln10 * x ) @@ -4913,6 +5025,62 @@ x y ; } +template +< +class +T +> +inline +T +decr +( +T +const +& +x +) +noexcept +{ +return +x +- +T +( +1 +) +; +} +template +< +class +T +> +inline +T +decr_if +( +T +const +& +x +bool +mask +) +noexcept +{ +return +x +- +T +( +mask +? +1 +: +0 +) +; +} # ifdef XSIMD_ENABLE_XTL_COMPLEX diff --git a/third_party/xsimd/include/xsimd/arch/xsimd_sse2.hpp b/third_party/xsimd/include/xsimd/arch/xsimd_sse2.hpp index b18c75f11c545..50749abe28f98 100644 --- a/third_party/xsimd/include/xsimd/arch/xsimd_sse2.hpp +++ b/third_party/xsimd/include/xsimd/arch/xsimd_sse2.hpp @@ -3784,6 +3784,82 @@ template < class A +class +T +class += +typename +std +: +: +enable_if +< +std +: +: +is_integral +< +T +> +: +: +value +void +> +: +: +type +> +inline +batch +< +T +A +> +decr_if +( +batch +< +T +A +> +const +& +self +batch_bool +< +T +A +> +const +& +mask +requires_arch +< +sse2 +> +) +noexcept +{ +return +self ++ +batch +< +T +A +> +( +mask +. +data +) +; +} +template +< +class +A > inline batch @@ -6016,6 +6092,82 @@ class A class T +class += +typename +std +: +: +enable_if +< +std +: +: +is_integral +< +T +> +: +: +value +void +> +: +: +type +> +inline +batch +< +T +A +> +incr_if +( +batch +< +T +A +> +const +& +self +batch_bool +< +T +A +> +const +& +mask +requires_arch +< +sse2 +> +) +noexcept +{ +return +self +- +batch +< +T +A +> +( +mask +. +data +) +; +} +template +< +class +A +class +T size_t I class diff --git a/third_party/xsimd/include/xsimd/config/xsimd_arch.hpp b/third_party/xsimd/include/xsimd/config/xsimd_arch.hpp index 08dbefc4b90f0..e6a6fe6e216ad 100644 --- a/third_party/xsimd/include/xsimd/config/xsimd_arch.hpp +++ b/third_party/xsimd/include/xsimd/config/xsimd_arch.hpp @@ -52,6 +52,92 @@ hpp namespace xsimd { +struct +unavailable +{ +static +constexpr +bool +supported +( +) +noexcept +{ +return +false +; +} +static +constexpr +bool +available +( +) +noexcept +{ +return +false +; +} +static +constexpr +unsigned +version +( +) +noexcept +{ +return +0 +; +} +static +constexpr +std +: +: +size_t +alignment +( +) +noexcept +{ +return +0 +; +} +static +constexpr +bool +requires_alignment +( +) +noexcept +{ +return +false +; +} +static +constexpr +char +const +* +name +( +) +noexcept +{ +return +" +< +none +> +" +; +} +} +; namespace detail { @@ -315,6 +401,59 @@ tail ) ; } +template +< +typename +. +. +. +Ts +> +struct +head +; +template +< +typename +T +typename +. +. +. +Ts +> +struct +head +< +T +Ts +. +. +. +> +{ +using +type += +T +; +} +; +template +< +> +struct +head +< +> +{ +using +type += +unavailable +; +} +; } template < @@ -358,6 +497,24 @@ version ; # endif +using +best += +typename +detail +: +: +head +< +Archs +. +. +. +> +: +: +type +; template < class @@ -508,92 +665,6 @@ size_t } } ; -struct -unavailable -{ -static -constexpr -bool -supported -( -) -noexcept -{ -return -false -; -} -static -constexpr -bool -available -( -) -noexcept -{ -return -false -; -} -static -constexpr -unsigned -version -( -) -noexcept -{ -return -0 -; -} -static -constexpr -std -: -: -size_t -alignment -( -) -noexcept -{ -return -0 -; -} -static -constexpr -bool -requires_alignment -( -) -noexcept -{ -return -false -; -} -static -constexpr -char -const -* -name -( -) -noexcept -{ -return -" -< -none -> -" -; -} -} -; namespace detail { @@ -601,62 +672,6 @@ template < class L -> -struct -best -; -template -< -> -struct -best -< -arch_list -< -> -> -{ -using -type -= -unavailable -; -} -; -template -< -class -Arch -class -. -. -. -Archs -> -struct -best -< -arch_list -< -Arch -Archs -. -. -. -> -> -{ -using -type -= -Arch -; -} -; -template -< -class -L class . . @@ -982,12 +997,6 @@ typename detail : : -best -< -typename -detail -: -: supported < all_x86_architectures @@ -995,10 +1004,9 @@ all_x86_architectures : : type -> : : -type +best ; using arm_arch @@ -1007,12 +1015,6 @@ typename detail : : -best -< -typename -detail -: -: supported < all_arm_architectures @@ -1020,38 +1022,36 @@ all_arm_architectures : : type -> : : -type +best ; using -default_arch +best_arch = typename -std -: -: -conditional -< -std -: -: -is_same -< -x86_arch -unavailable -> -: -: -value -arm_arch -x86_arch -> +supported_architectures : : -type +best +; +# +ifdef +XSIMD_DEFAULT_ARCH +using +default_arch += +XSIMD_DEFAULT_ARCH +; +# +else +using +default_arch += +best_arch ; +# +endif namespace detail { @@ -1067,7 +1067,7 @@ dispatcher { const unsigned -best_arch +best_arch_found ; F functor @@ -1241,7 +1241,7 @@ version ) < = -best_arch +best_arch_found ) return functor @@ -1303,7 +1303,7 @@ f ) noexcept : -best_arch +best_arch_found ( available_architectures ( diff --git a/third_party/xsimd/include/xsimd/config/xsimd_config.hpp b/third_party/xsimd/include/xsimd/config/xsimd_config.hpp index 272165cd61f66..9e612240cb413 100644 --- a/third_party/xsimd/include/xsimd/config/xsimd_config.hpp +++ b/third_party/xsimd/include/xsimd/config/xsimd_config.hpp @@ -7,7 +7,7 @@ XSIMD_CONFIG_HPP # define XSIMD_VERSION_MAJOR -10 +11 # define XSIMD_VERSION_MINOR @@ -15,7 +15,7 @@ XSIMD_VERSION_MINOR # define XSIMD_VERSION_PATCH -0 +1 # ifdef __SSE2__ diff --git a/third_party/xsimd/include/xsimd/config/xsimd_cpuid.hpp b/third_party/xsimd/include/xsimd/config/xsimd_cpuid.hpp index 67c395c431ccf..ee11e1555dec1 100644 --- a/third_party/xsimd/include/xsimd/config/xsimd_cpuid.hpp +++ b/third_party/xsimd/include/xsimd/config/xsimd_cpuid.hpp @@ -186,6 +186,11 @@ neon64 1 ; unsigned +sve +: +1 +; +unsigned best ; supported_arch @@ -304,6 +309,75 @@ neon elif defined ( +__ARM_FEATURE_SVE +) +& +& +defined +( +__ARM_FEATURE_SVE_BITS +) +& +& +__ARM_FEATURE_SVE_BITS +> +0 +# +if +defined +( +__linux__ +) +& +& +( +! +defined +( +__ANDROID_API__ +) +| +| +__ANDROID_API__ +> += +18 +) +sve += +bool +( +getauxval +( +AT_HWCAP +) +& +HWCAP_SVE +) +; +# +else +sve += +0 +; +# +endif +best += +sve +: +: +version +( +) +* +sve +; +# +elif +defined +( __x86_64__ ) | @@ -574,20 +648,20 @@ endif } ; int -regs +regs1 [ 4 ] ; get_cpuid ( -regs +regs1 0x1 ) ; sse2 = -regs +regs1 [ 3 ] @@ -617,7 +691,7 @@ sse2 ; sse3 = -regs +regs1 [ 2 ] @@ -647,7 +721,7 @@ sse3 ; ssse3 = -regs +regs1 [ 2 ] @@ -677,7 +751,7 @@ ssse3 ; sse4_1 = -regs +regs1 [ 2 ] @@ -707,7 +781,7 @@ sse4_1 ; sse4_2 = -regs +regs1 [ 2 ] @@ -737,7 +811,7 @@ sse4_2 ; fma3_sse = -regs +regs1 [ 2 ] @@ -775,21 +849,15 @@ version fma3_sse ) ; -get_cpuid -( -regs -0x80000001 -) -; -fma4 +avx = -regs +regs1 [ 2 ] > > -16 +28 & 1 ; @@ -801,27 +869,22 @@ std max ( best -fma4 +avx : : version ( ) * -fma4 +avx ) ; -avx +fma3_avx = -regs -[ -2 -] -> -> -28 +avx & -1 +& +fma3_sse ; best = @@ -831,22 +894,45 @@ std max ( best +fma3 +< +xsimd +: +: avx +> : : version ( ) * -avx +fma3_avx ) ; -fma3_avx +int +regs8 +[ +4 +] +; +get_cpuid +( +regs8 +0x80000001 +) +; +fma4 = -avx -& +regs8 +[ +2 +] +> +> +16 & -fma3_sse +1 ; best = @@ -856,31 +942,31 @@ std max ( best -fma3 -< -xsimd -: -: -avx -> +fma4 : : version ( ) * -fma3_avx +fma4 ) ; +int +regs7 +[ +4 +] +; get_cpuid ( -regs +regs7 0x7 ) ; avx2 = -regs +regs7 [ 1 ] @@ -941,7 +1027,7 @@ fma3_avx2 ; avx512f = -regs +regs7 [ 1 ] @@ -971,7 +1057,7 @@ avx512f ; avx512cd = -regs +regs7 [ 1 ] @@ -1003,7 +1089,7 @@ avx512f ; avx512dq = -regs +regs7 [ 1 ] @@ -1037,7 +1123,7 @@ avx512f ; avx512bw = -regs +regs7 [ 1 ] diff --git a/third_party/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp b/third_party/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp index d65674e37d6e1..7877515c83983 100644 --- a/third_party/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +++ b/third_party/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp @@ -69,13 +69,6 @@ class T size_t Align -= -default_arch -: -: -alignment -( -) > class aligned_allocator diff --git a/third_party/xsimd/include/xsimd/memory/xsimd_alignment.hpp b/third_party/xsimd/include/xsimd/memory/xsimd_alignment.hpp index c7fe441c4f3d7..b43892b62b1b7 100644 --- a/third_party/xsimd/include/xsimd/memory/xsimd_alignment.hpp +++ b/third_party/xsimd/include/xsimd/memory/xsimd_alignment.hpp @@ -55,6 +55,8 @@ template < class T +size_t +N > struct allocator_alignment @@ -62,6 +64,7 @@ allocator_alignment aligned_allocator < T +N > > { diff --git a/third_party/xsimd/include/xsimd/types/xsimd_api.hpp b/third_party/xsimd/include/xsimd/types/xsimd_api.hpp index d58accca02e98..e7408c67e151b 100644 --- a/third_party/xsimd/include/xsimd/types/xsimd_api.hpp +++ b/third_party/xsimd/include/xsimd/types/xsimd_api.hpp @@ -189,6 +189,7 @@ add batch < T +A > const & @@ -2058,6 +2059,119 @@ class A > inline +batch +< +T +A +> +decr +( +batch +< +T +A +> +const +& +x +) +noexcept +{ +detail +: +: +static_check_supported_config +< +T +A +> +( +) +; +return +kernel +: +: +decr +< +A +> +( +x +A +{ +} +) +; +} +template +< +class +T +class +A +class +Mask +> +inline +batch +< +T +A +> +decr_if +( +batch +< +T +A +> +const +& +x +Mask +const +& +mask +) +noexcept +{ +detail +: +: +static_check_supported_config +< +T +A +> +( +) +; +return +kernel +: +: +decr_if +< +A +> +( +x +mask +A +{ +} +) +; +} +template +< +class +T +class +A +> +inline auto div ( @@ -3359,25 +3473,23 @@ class T class A -class -F > inline +batch +< T -reduce +A +> +haddp ( -F -& -& -f batch < T A > const -& -x +* +row ) noexcept { @@ -3396,39 +3508,15 @@ return kernel : : -detail -: -: -reduce -( -std -: -: -forward -< -F -> -( -f -) -x -std -: -: -integral_constant -< -unsigned -batch +haddp < -T A > -: -: -size -> ( -) +row +A +{ +} ) ; } @@ -3440,8 +3528,12 @@ class A > inline +batch +< T -reduce_add +A +> +hypot ( batch < @@ -3451,6 +3543,14 @@ A const & x +batch +< +T +A +> +const +& +y ) noexcept { @@ -3469,12 +3569,13 @@ return kernel : : -reduce_add +hypot < A > ( x +y A { } @@ -3489,8 +3590,15 @@ class A > inline +real_batch_type_t +< +batch +< T -reduce_max +A +> +> +imag ( batch < @@ -3518,7 +3626,7 @@ return kernel : : -reduce_max +imag < A > @@ -3538,8 +3646,12 @@ class A > inline +batch +< T -reduce_min +A +> +incr ( batch < @@ -3567,7 +3679,7 @@ return kernel : : -reduce_min +incr < A > @@ -3585,6 +3697,8 @@ class T class A +class +Mask > inline batch @@ -3592,7 +3706,7 @@ batch T A > -haddp +incr_if ( batch < @@ -3600,8 +3714,12 @@ T A > const -* -row +& +x +Mask +const +& +mask ) noexcept { @@ -3620,12 +3738,13 @@ return kernel : : -haddp +incr_if < A > ( -row +x +mask A { } @@ -3635,149 +3754,31 @@ A template < class -T -class -A -> -inline -batch -< -T -A +B > -hypot +B +infinity ( -batch -< -T -A -> -const -& -x -batch -< -T -A -> -const -& -y ) -noexcept { -detail -: -: -static_check_supported_config -< -T -A -> -( -) -; -return -kernel -: -: -hypot -< -A -> -( -x -y -A -{ -} -) -; -} -template -< -class -T -class -A -> -inline -real_batch_type_t -< -batch -< -T -A -> -> -imag -( -batch -< -T -A -> -const -& -x -) -noexcept -{ -detail -: -: -static_check_supported_config -< -T -A -> -( -) -; -return -kernel -: -: -imag -< -A -> -( -x -A -{ -} -) -; -} -template -< -class -B -> -B -infinity -( -) -{ -using -T -= -typename -B -: -: -value_type -; -using -A -= -typename -B -: -: -arch_type -; +using +T += +typename +B +: +: +value_type +; +using +A += +typename +B +: +: +arch_type +; detail : : @@ -6432,6 +6433,62 @@ class T class A +> +inline +real_batch_type_t +< +batch +< +T +A +> +> +real +( +batch +< +T +A +> +const +& +z +) +noexcept +{ +detail +: +: +static_check_supported_config +< +T +A +> +( +) +; +return +kernel +: +: +real +< +A +> +( +z +A +{ +} +) +; +} +template +< +class +T +class +A class = typename @@ -6505,17 +6562,89 @@ class T class A +class +F > inline -real_batch_type_t +T +reduce +( +F +& +& +f +batch +< +T +A +> +const +& +x +) +noexcept +{ +detail +: +: +static_check_supported_config +< +T +A +> +( +) +; +return +kernel +: +: +detail +: +: +reduce +( +std +: +: +forward +< +F +> +( +f +) +x +std +: +: +integral_constant < +unsigned batch < T A > +: +: +size > -real +( +) +) +; +} +template +< +class +T +class +A +> +inline +T +reduce_add ( batch < @@ -6524,7 +6653,7 @@ A > const & -z +x ) noexcept { @@ -6543,12 +6672,110 @@ return kernel : : -real +reduce_add < A > ( -z +x +A +{ +} +) +; +} +template +< +class +T +class +A +> +inline +T +reduce_max +( +batch +< +T +A +> +const +& +x +) +noexcept +{ +detail +: +: +static_check_supported_config +< +T +A +> +( +) +; +return +kernel +: +: +reduce_max +< +A +> +( +x +A +{ +} +) +; +} +template +< +class +T +class +A +> +inline +T +reduce_min +( +batch +< +T +A +> +const +& +x +) +noexcept +{ +detail +: +: +static_check_supported_config +< +T +A +> +( +) +; +return +kernel +: +: +reduce_min +< +A +> +( +x A { } diff --git a/third_party/xsimd/include/xsimd/types/xsimd_generic_arch.hpp b/third_party/xsimd/include/xsimd/types/xsimd_generic_arch.hpp index e136316764593..5e4612669ad59 100644 --- a/third_party/xsimd/include/xsimd/types/xsimd_generic_arch.hpp +++ b/third_party/xsimd/include/xsimd/types/xsimd_generic_arch.hpp @@ -93,6 +93,22 @@ version ) ; } +static +constexpr +char +const +* +name +( +) +noexcept +{ +return +" +generic +" +; +} protected : static diff --git a/third_party/xsimd/moz.yaml b/third_party/xsimd/moz.yaml index 958e192e9994b..062a243f7471d 100644 --- a/third_party/xsimd/moz.yaml +++ b/third_party/xsimd/moz.yaml @@ -40,22 +40,34 @@ QuantStack xsimd release : -e8f209c3397c8a866be2312682689a04e4abfd66 +11 +. +1 +. +0 ( 2023 - -02 +05 - -27T06 +13T15 +: +49 : -32 +21 ++ +00 : -46Z +00 ) . revision : -e8f209c3397c8a866be2312682689a04e4abfd66 +11 +. +1 +. +0 license : BSD