From 5fda0784f6908fcc6c21bab795f5a963aaf9968c Mon Sep 17 00:00:00 2001
From: Hans Kratz <hans@appfour.com>
Date: Thu, 24 Oct 2024 08:30:26 +0200
Subject: [PATCH] Make sure there are no memcpy calls (#108)

Apparently even with the hand-rolled `memcpy_unaligned_nonoverlapping_inline_opt_lt_64` llvm still replaced that code for the last < 8 bytes with a call to `memcpy`. This fixes it.

The CI inlining tests now check for all present symbols. The rationale is that referenced symbols must be called from the simdutf8 if present.

As a drive-by change it uses `get_unchecked` to get rid of panicking code in `validate_utf8_at_offset` and `get_compat_error`.
---
 inlining/check-inlining.sh    | 26 +++++++++++++----
 src/implementation/helpers.rs | 54 ++++++++++++++++++++++++++++-------
 src/implementation/mod.rs     |  3 +-
 3 files changed, 66 insertions(+), 17 deletions(-)
diff --git a/inlining/check-inlining.sh b/inlining/check-inlining.sh
index 6caa8854..86720d91 100755
--- a/inlining/check-inlining.sh
+++ b/inlining/check-inlining.sh
@@ -7,16 +7,30 @@ build_args="${3:-}"
 cargo clean --quiet
 cargo build --quiet --release --target $target $build_args
 LLVM_NM=$(rustc --print sysroot)/lib/rustlib/$(rustc -vV | sed -n 's|host: ||p')/bin/llvm-nm
-nm_output=$($LLVM_NM --defined-only ../target/$target/release/libsimdutf8.rlib)
+nm_output=$($LLVM_NM ../target/$target/release/libsimdutf8.rlib)
 if [[ $target == *darwin* ]]; then
-    pattern=" (t|T) _"
+    pattern=" (t|T|U) _"
     cut_arg=21
 elif [[ $target == *armv7* ]]; then
-    pattern=" (t|T) "
+    pattern=" (t|T|U) "
     cut_arg=12
 else
-    pattern=" (t|T) "
+    pattern=" (t|T|U) "
     cut_arg=20
 fi
-inline_ignore_pattern='drop_in_place|::fmt::|^\$x\.|^<T as core::convert::From<T>>::from$|^core::result::Result<T,E>::map_err$'
-echo "$nm_output" | rustfilt | egrep "$pattern" | cut -c "$cut_arg"- | grep -Ev "$inline_ignore_pattern" | sort | diff -u $expected_fns -
+inline_ignore_pattern=\
+'__aeabi_unwind_cpp_pr(0|1)|'\
+'drop_in_place|'\
+'core::str::converts::from_utf8|'\
+'std_detect::detect::|'\
+'::fmt::|'\
+'^\$x\.|'\
+'^<T as core::convert::From<T>>::from$|'\
+'^core::str::Utf8Error::error_len$|'\
+'^core::str::Utf8Error::valid_up_to$|'\
+'^core::str::from_utf8$|'\
+'^core::result::Result<T,E>::map_err$'
+if [[ $target == *wasm* ]]; then
+    inline_ignore_pattern="$inline_ignore_pattern|ct_function_table|pointer|r::converts::from_utf8|t::Formatter::write_str|t::write"
+fi
+echo "$nm_output" | rustfilt | egrep "$pattern" | cut -c "$cut_arg"- | grep -Ev "$inline_ignore_pattern" | sort -u | diff -u $expected_fns -
diff --git a/src/implementation/helpers.rs b/src/implementation/helpers.rs
index 0f4ecc9b..c74b4209 100644
--- a/src/implementation/helpers.rs
+++ b/src/implementation/helpers.rs
@@ -1,9 +1,21 @@
+use core::hint::unreachable_unchecked;
+
 type Utf8ErrorCompat = crate::compat::Utf8Error;
 
+/// Uses core::str::from_utf8 to validate that the subslice
+/// starting at `offset` is valid UTF-8.
+///
+/// # Safety
+/// Caller has to ensure that `offset` is in bounds.
+///
 #[inline]
 #[flexpect::e(clippy::cast_possible_truncation)]
-pub(crate) fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(), Utf8ErrorCompat> {
-    match core::str::from_utf8(&input[offset..]) {
+pub(crate) unsafe fn validate_utf8_at_offset(
+    input: &[u8],
+    offset: usize,
+) -> Result<(), Utf8ErrorCompat> {
+    let input = input.get_unchecked(offset..);
+    match core::str::from_utf8(input) {
         Ok(_) => Ok(()),
         Err(err) => Err(Utf8ErrorCompat {
             valid_up_to: err.valid_up_to() + offset,
@@ -15,8 +27,17 @@ pub(crate) fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(),
     }
 }
 
+/// Necessary tor 1.38 compatibility
+#[inline]
+unsafe fn unwrap_err_unchecked<O, E>(r: Result<O, E>) -> E {
+    match r {
+        // SAFETY: the safety contract must be upheld by the caller.
+        Ok(_) => unreachable_unchecked(),
+        Err(e) => e,
+    }
+}
+
 #[cold]
-#[flexpect::e(clippy::unwrap_used)]
 #[allow(dead_code)] // only used if there is a SIMD implementation
 pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> Utf8ErrorCompat {
     let offset = if failing_block_pos == 0 {
@@ -29,12 +50,14 @@ pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> Utf8Er
         // three bytes are all continuation bytes then the previous block ends with a four byte
         // UTF-8 codepoint, is thus complete and valid UTF-8. We start the check with the
         // current block in that case.
+        //
+        // SAFETY: safe because failing_block_pos is in bounds.
         (1..=3)
-            .find(|i| input[failing_block_pos - i] >> 6 != 0b10)
+            .find(|i| *(unsafe { input.get_unchecked(failing_block_pos - i) }) >> 6 != 0b10)
             .map_or(failing_block_pos, |i| failing_block_pos - i)
     };
-    // UNWRAP: safe because the SIMD UTF-8 validation found an error
-    validate_utf8_at_offset(input, offset).unwrap_err()
+    // SAFETY: safe because the SIMD UTF-8 validation found an error and offset is in bounds.
+    unsafe { unwrap_err_unchecked(validate_utf8_at_offset(input, offset)) }
 }
 
 #[allow(dead_code)] // only used if there is a SIMD implementation
@@ -70,11 +93,22 @@ pub(crate) unsafe fn memcpy_unaligned_nonoverlapping_inline_opt_lt_64(
         memcpy_u64(&mut src, &mut dest);
         len -= 8;
     }
-    while len > 0 {
+    if len >= 4 {
+        dest.cast::<u32>()
+            .write_unaligned(src.cast::<u32>().read_unaligned());
+        src = src.offset(4);
+        dest = dest.offset(4);
+        len -= 4;
+    }
+    if len >= 2 {
+        dest.cast::<u16>()
+            .write_unaligned(src.cast::<u16>().read_unaligned());
+        src = src.offset(2);
+        dest = dest.offset(2);
+        len -= 2;
+    }
+    if len == 1 {
         *dest = *src;
-        src = src.offset(1);
-        dest = dest.offset(1);
-        len -= 1;
     }
 }
 
diff --git a/src/implementation/mod.rs b/src/implementation/mod.rs
index b3f4e460..1a138530 100644
--- a/src/implementation/mod.rs
+++ b/src/implementation/mod.rs
@@ -108,5 +108,6 @@ pub(crate) fn validate_utf8_basic_fallback(input: &[u8]) -> Result<(), crate::ba
 
 #[inline]
 pub(crate) fn validate_utf8_compat_fallback(input: &[u8]) -> Result<(), crate::compat::Utf8Error> {
-    helpers::validate_utf8_at_offset(input, 0)
+    // SAFETY: 0 is always in bounds
+    unsafe { helpers::validate_utf8_at_offset(input, 0) }
 }