From 5fda0784f6908fcc6c21bab795f5a963aaf9968c Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Thu, 24 Oct 2024 08:30:26 +0200 Subject: [PATCH] Make sure there are no memcpy calls (#108) Apparently even with the hand-rolled `memcpy_unaligned_nonoverlapping_inline_opt_lt_64` llvm still replaced that code for the last < 8 bytes with a call to `memcpy`. This fixes it. The CI inlining tests now check for all present symbols. The rationale is that referenced symbols must be called from the simdutf8 if present. As a drive-by change it uses `get_unchecked` to get rid of panicking code in `validate_utf8_at_offset` and `get_compat_error`. --- inlining/check-inlining.sh | 26 +++++++++++++---- src/implementation/helpers.rs | 54 ++++++++++++++++++++++++++++------- src/implementation/mod.rs | 3 +- 3 files changed, 66 insertions(+), 17 deletions(-) diff --git a/inlining/check-inlining.sh b/inlining/check-inlining.sh index 6caa8854..86720d91 100755 --- a/inlining/check-inlining.sh +++ b/inlining/check-inlining.sh @@ -7,16 +7,30 @@ build_args="${3:-}" cargo clean --quiet cargo build --quiet --release --target $target $build_args LLVM_NM=$(rustc --print sysroot)/lib/rustlib/$(rustc -vV | sed -n 's|host: ||p')/bin/llvm-nm -nm_output=$($LLVM_NM --defined-only ../target/$target/release/libsimdutf8.rlib) +nm_output=$($LLVM_NM ../target/$target/release/libsimdutf8.rlib) if [[ $target == *darwin* ]]; then - pattern=" (t|T) _" + pattern=" (t|T|U) _" cut_arg=21 elif [[ $target == *armv7* ]]; then - pattern=" (t|T) " + pattern=" (t|T|U) " cut_arg=12 else - pattern=" (t|T) " + pattern=" (t|T|U) " cut_arg=20 fi -inline_ignore_pattern='drop_in_place|::fmt::|^\$x\.|^>::from$|^core::result::Result::map_err$' -echo "$nm_output" | rustfilt | egrep "$pattern" | cut -c "$cut_arg"- | grep -Ev "$inline_ignore_pattern" | sort | diff -u $expected_fns - +inline_ignore_pattern=\ +'__aeabi_unwind_cpp_pr(0|1)|'\ +'drop_in_place|'\ +'core::str::converts::from_utf8|'\ +'std_detect::detect::|'\ +'::fmt::|'\ +'^\$x\.|'\ +'^>::from$|'\ +'^core::str::Utf8Error::error_len$|'\ +'^core::str::Utf8Error::valid_up_to$|'\ +'^core::str::from_utf8$|'\ +'^core::result::Result::map_err$' +if [[ $target == *wasm* ]]; then + inline_ignore_pattern="$inline_ignore_pattern|ct_function_table|pointer|r::converts::from_utf8|t::Formatter::write_str|t::write" +fi +echo "$nm_output" | rustfilt | egrep "$pattern" | cut -c "$cut_arg"- | grep -Ev "$inline_ignore_pattern" | sort -u | diff -u $expected_fns - diff --git a/src/implementation/helpers.rs b/src/implementation/helpers.rs index 0f4ecc9b..c74b4209 100644 --- a/src/implementation/helpers.rs +++ b/src/implementation/helpers.rs @@ -1,9 +1,21 @@ +use core::hint::unreachable_unchecked; + type Utf8ErrorCompat = crate::compat::Utf8Error; +/// Uses core::str::from_utf8 to validate that the subslice +/// starting at `offset` is valid UTF-8. +/// +/// # Safety +/// Caller has to ensure that `offset` is in bounds. +/// #[inline] #[flexpect::e(clippy::cast_possible_truncation)] -pub(crate) fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(), Utf8ErrorCompat> { - match core::str::from_utf8(&input[offset..]) { +pub(crate) unsafe fn validate_utf8_at_offset( + input: &[u8], + offset: usize, +) -> Result<(), Utf8ErrorCompat> { + let input = input.get_unchecked(offset..); + match core::str::from_utf8(input) { Ok(_) => Ok(()), Err(err) => Err(Utf8ErrorCompat { valid_up_to: err.valid_up_to() + offset, @@ -15,8 +27,17 @@ pub(crate) fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(), } } +/// Necessary tor 1.38 compatibility +#[inline] +unsafe fn unwrap_err_unchecked(r: Result) -> E { + match r { + // SAFETY: the safety contract must be upheld by the caller. + Ok(_) => unreachable_unchecked(), + Err(e) => e, + } +} + #[cold] -#[flexpect::e(clippy::unwrap_used)] #[allow(dead_code)] // only used if there is a SIMD implementation pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> Utf8ErrorCompat { let offset = if failing_block_pos == 0 { @@ -29,12 +50,14 @@ pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> Utf8Er // three bytes are all continuation bytes then the previous block ends with a four byte // UTF-8 codepoint, is thus complete and valid UTF-8. We start the check with the // current block in that case. + // + // SAFETY: safe because failing_block_pos is in bounds. (1..=3) - .find(|i| input[failing_block_pos - i] >> 6 != 0b10) + .find(|i| *(unsafe { input.get_unchecked(failing_block_pos - i) }) >> 6 != 0b10) .map_or(failing_block_pos, |i| failing_block_pos - i) }; - // UNWRAP: safe because the SIMD UTF-8 validation found an error - validate_utf8_at_offset(input, offset).unwrap_err() + // SAFETY: safe because the SIMD UTF-8 validation found an error and offset is in bounds. + unsafe { unwrap_err_unchecked(validate_utf8_at_offset(input, offset)) } } #[allow(dead_code)] // only used if there is a SIMD implementation @@ -70,11 +93,22 @@ pub(crate) unsafe fn memcpy_unaligned_nonoverlapping_inline_opt_lt_64( memcpy_u64(&mut src, &mut dest); len -= 8; } - while len > 0 { + if len >= 4 { + dest.cast::() + .write_unaligned(src.cast::().read_unaligned()); + src = src.offset(4); + dest = dest.offset(4); + len -= 4; + } + if len >= 2 { + dest.cast::() + .write_unaligned(src.cast::().read_unaligned()); + src = src.offset(2); + dest = dest.offset(2); + len -= 2; + } + if len == 1 { *dest = *src; - src = src.offset(1); - dest = dest.offset(1); - len -= 1; } } diff --git a/src/implementation/mod.rs b/src/implementation/mod.rs index b3f4e460..1a138530 100644 --- a/src/implementation/mod.rs +++ b/src/implementation/mod.rs @@ -108,5 +108,6 @@ pub(crate) fn validate_utf8_basic_fallback(input: &[u8]) -> Result<(), crate::ba #[inline] pub(crate) fn validate_utf8_compat_fallback(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { - helpers::validate_utf8_at_offset(input, 0) + // SAFETY: 0 is always in bounds + unsafe { helpers::validate_utf8_at_offset(input, 0) } }