Skip to content

Commit

Permalink
Make sure there are no memcpy calls (#108)
Browse files Browse the repository at this point in the history
Apparently even with the hand-rolled `memcpy_unaligned_nonoverlapping_inline_opt_lt_64` llvm still replaced that code for the last < 8 bytes with a call to `memcpy`. This fixes it.

The CI inlining tests now check for all present symbols. The rationale is that referenced symbols must be called from the simdutf8 if present.

As a drive-by change it uses `get_unchecked` to get rid of panicking code in `validate_utf8_at_offset` and `get_compat_error`.
  • Loading branch information
hkratz authored Oct 24, 2024
1 parent 5d531be commit 5fda078
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 17 deletions.
26 changes: 20 additions & 6 deletions inlining/check-inlining.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,30 @@ build_args="${3:-}"
cargo clean --quiet
cargo build --quiet --release --target $target $build_args
LLVM_NM=$(rustc --print sysroot)/lib/rustlib/$(rustc -vV | sed -n 's|host: ||p')/bin/llvm-nm
nm_output=$($LLVM_NM --defined-only ../target/$target/release/libsimdutf8.rlib)
nm_output=$($LLVM_NM ../target/$target/release/libsimdutf8.rlib)
if [[ $target == *darwin* ]]; then
pattern=" (t|T) _"
pattern=" (t|T|U) _"
cut_arg=21
elif [[ $target == *armv7* ]]; then
pattern=" (t|T) "
pattern=" (t|T|U) "
cut_arg=12
else
pattern=" (t|T) "
pattern=" (t|T|U) "
cut_arg=20
fi
inline_ignore_pattern='drop_in_place|::fmt::|^\$x\.|^<T as core::convert::From<T>>::from$|^core::result::Result<T,E>::map_err$'
echo "$nm_output" | rustfilt | egrep "$pattern" | cut -c "$cut_arg"- | grep -Ev "$inline_ignore_pattern" | sort | diff -u $expected_fns -
inline_ignore_pattern=\
'__aeabi_unwind_cpp_pr(0|1)|'\
'drop_in_place|'\
'core::str::converts::from_utf8|'\
'std_detect::detect::|'\
'::fmt::|'\
'^\$x\.|'\
'^<T as core::convert::From<T>>::from$|'\
'^core::str::Utf8Error::error_len$|'\
'^core::str::Utf8Error::valid_up_to$|'\
'^core::str::from_utf8$|'\
'^core::result::Result<T,E>::map_err$'
if [[ $target == *wasm* ]]; then
inline_ignore_pattern="$inline_ignore_pattern|ct_function_table|pointer|r::converts::from_utf8|t::Formatter::write_str|t::write"
fi
echo "$nm_output" | rustfilt | egrep "$pattern" | cut -c "$cut_arg"- | grep -Ev "$inline_ignore_pattern" | sort -u | diff -u $expected_fns -
54 changes: 44 additions & 10 deletions src/implementation/helpers.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
use core::hint::unreachable_unchecked;

type Utf8ErrorCompat = crate::compat::Utf8Error;

/// Uses core::str::from_utf8 to validate that the subslice
/// starting at `offset` is valid UTF-8.
///
/// # Safety
/// Caller has to ensure that `offset` is in bounds.
///
#[inline]
#[flexpect::e(clippy::cast_possible_truncation)]
pub(crate) fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(), Utf8ErrorCompat> {
match core::str::from_utf8(&input[offset..]) {
pub(crate) unsafe fn validate_utf8_at_offset(
input: &[u8],
offset: usize,
) -> Result<(), Utf8ErrorCompat> {
let input = input.get_unchecked(offset..);
match core::str::from_utf8(input) {
Ok(_) => Ok(()),
Err(err) => Err(Utf8ErrorCompat {
valid_up_to: err.valid_up_to() + offset,
Expand All @@ -15,8 +27,17 @@ pub(crate) fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(),
}
}

/// Necessary tor 1.38 compatibility
#[inline]
unsafe fn unwrap_err_unchecked<O, E>(r: Result<O, E>) -> E {
match r {
// SAFETY: the safety contract must be upheld by the caller.
Ok(_) => unreachable_unchecked(),
Err(e) => e,
}
}

#[cold]
#[flexpect::e(clippy::unwrap_used)]
#[allow(dead_code)] // only used if there is a SIMD implementation
pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> Utf8ErrorCompat {
let offset = if failing_block_pos == 0 {
Expand All @@ -29,12 +50,14 @@ pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> Utf8Er
// three bytes are all continuation bytes then the previous block ends with a four byte
// UTF-8 codepoint, is thus complete and valid UTF-8. We start the check with the
// current block in that case.
//
// SAFETY: safe because failing_block_pos is in bounds.
(1..=3)
.find(|i| input[failing_block_pos - i] >> 6 != 0b10)
.find(|i| *(unsafe { input.get_unchecked(failing_block_pos - i) }) >> 6 != 0b10)
.map_or(failing_block_pos, |i| failing_block_pos - i)
};
// UNWRAP: safe because the SIMD UTF-8 validation found an error
validate_utf8_at_offset(input, offset).unwrap_err()
// SAFETY: safe because the SIMD UTF-8 validation found an error and offset is in bounds.
unsafe { unwrap_err_unchecked(validate_utf8_at_offset(input, offset)) }
}

#[allow(dead_code)] // only used if there is a SIMD implementation
Expand Down Expand Up @@ -70,11 +93,22 @@ pub(crate) unsafe fn memcpy_unaligned_nonoverlapping_inline_opt_lt_64(
memcpy_u64(&mut src, &mut dest);
len -= 8;
}
while len > 0 {
if len >= 4 {
dest.cast::<u32>()
.write_unaligned(src.cast::<u32>().read_unaligned());
src = src.offset(4);
dest = dest.offset(4);
len -= 4;
}
if len >= 2 {
dest.cast::<u16>()
.write_unaligned(src.cast::<u16>().read_unaligned());
src = src.offset(2);
dest = dest.offset(2);
len -= 2;
}
if len == 1 {
*dest = *src;
src = src.offset(1);
dest = dest.offset(1);
len -= 1;
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/implementation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,5 +108,6 @@ pub(crate) fn validate_utf8_basic_fallback(input: &[u8]) -> Result<(), crate::ba

#[inline]
pub(crate) fn validate_utf8_compat_fallback(input: &[u8]) -> Result<(), crate::compat::Utf8Error> {
helpers::validate_utf8_at_offset(input, 0)
// SAFETY: 0 is always in bounds
unsafe { helpers::validate_utf8_at_offset(input, 0) }
}

0 comments on commit 5fda078

Please sign in to comment.