From bc37d4b3d9ac9e647c7eda5aee9fda07ce840b3f Mon Sep 17 00:00:00 2001 From: wrv Date: Mon, 19 Aug 2024 21:56:30 -0500 Subject: [PATCH] x86 wasm: Wasm SIMD version of `_mm_sad_epu8` --- simde/x86/sse2.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/simde/x86/sse2.h b/simde/x86/sse2.h index a97b30904..1d73c89a0 100644 --- a/simde/x86/sse2.h +++ b/simde/x86/sse2.h @@ -4812,6 +4812,14 @@ simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { r_.neon_u64 = vcombine_u64( vpaddl_u32(vpaddl_u16(vget_low_u16(t))), vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t tmp = wasm_v128_or(wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128), + wasm_u8x16_sub_sat(b_.wasm_v128, a_.wasm_v128)); + tmp = wasm_i16x8_add(wasm_u16x8_shr(tmp, 8), + wasm_v128_and(tmp, wasm_i16x8_splat(0x00FF))); + tmp = wasm_i16x8_add(tmp, wasm_i32x4_shl(tmp, 16)); + tmp = wasm_i16x8_add(tmp, wasm_i64x2_shl(tmp, 32)); + r_.wasm_v128 = wasm_u64x2_shr(tmp, 48); #else for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { uint16_t tmp = 0;