diff --git a/bench/Cargo.lock b/bench/Cargo.lock index ff131814..765bdc4f 100644 --- a/bench/Cargo.lock +++ b/bench/Cargo.lock @@ -69,9 +69,9 @@ checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" [[package]] name = "anyhow" -version = "1.0.91" +version = "1.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c042108f3ed77fd83760a5fd79b53be043192bb3b9dba91d8c574c0ada7850c8" +checksum = "37bf3594c4c988a53154954629820791dde498571819ae4ca50ca811e060cc95" [[package]] name = "async-trait" @@ -81,7 +81,7 @@ checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.82", ] [[package]] @@ -639,7 +639,7 @@ dependencies = [ "ident_case", "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.82", ] [[package]] @@ -650,7 +650,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 2.0.85", + "syn 2.0.82", ] [[package]] @@ -727,7 +727,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.82", ] [[package]] @@ -1432,9 +1432,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.89" +version = "1.0.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9" dependencies = [ "unicode-ident", ] @@ -1780,9 +1780,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.213" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" dependencies = [ "serde_derive", ] @@ -1798,13 +1798,13 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.213" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.82", ] [[package]] @@ -1873,10 +1873,18 @@ dependencies = [ "criterion", "simdjson-utf8", "simdutf8 0.1.5", + "simdutf8-portable", "wasmer", "wasmtime", ] +[[package]] +name = "simdutf8-portable" +version = "0.1.0" +dependencies = [ + "cfg-if 1.0.0", +] + [[package]] name = "smallvec" version = "1.13.2" @@ -1908,9 +1916,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.85" +version = "2.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" +checksum = "83540f837a8afc019423a8edb95b52a8effe46957ee402287f4292fae35be021" dependencies = [ "proc-macro2", "quote", @@ -1962,22 +1970,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.65" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d11abd9594d9b38965ef50805c5e469ca9cc6f197f883f717e0269a3057b3d5" +checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.65" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602" +checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.82", ] [[package]] @@ -2034,7 +2042,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.82", ] [[package]] @@ -2126,7 +2134,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.82", "wasm-bindgen-shared", ] @@ -2148,7 +2156,7 @@ checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.82", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2851,7 +2859,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.82", ] [[package]] diff --git a/bench/Cargo.toml b/bench/Cargo.toml index 8d09b76c..a6640796 100644 --- a/bench/Cargo.toml +++ b/bench/Cargo.toml @@ -30,6 +30,7 @@ simdutf8_wasmtime = ["wasmtime"] core_affinity = "0.8.1" criterion = "0.5.1" simdutf8 = { version = "*", path = "..", features = ["aarch64_neon"] } +simdutf8-portable = { version = "*", path = "../portable" } simdjson-utf8 = { version = "*", path = "simdjson-utf8", optional = true } # default is cranelift which is not as performant as the llvm backend wasmer = { version = "2.1", optional = true, default-features = false } @@ -47,6 +48,14 @@ harness = false name = "throughput_compat" harness = false +[[bench]] +name = "throughput_basic_portable" +harness = false + +[[bench]] +name = "throughput_compat_portable" +harness = false + [[bench]] name = "throughput_std" harness = false diff --git a/bench/benches/throughput_basic_portable.rs b/bench/benches/throughput_basic_portable.rs new file mode 100644 index 00000000..4da0ed75 --- /dev/null +++ b/bench/benches/throughput_basic_portable.rs @@ -0,0 +1,3 @@ +use simdutf8_bench::define_throughput_benchmark; + +define_throughput_benchmark!(BenchFn::BasicPortable); diff --git a/bench/benches/throughput_compat_portable.rs b/bench/benches/throughput_compat_portable.rs new file mode 100644 index 00000000..bd51376f --- /dev/null +++ b/bench/benches/throughput_compat_portable.rs @@ -0,0 +1,3 @@ +use simdutf8_bench::define_throughput_benchmark; + +define_throughput_benchmark!(BenchFn::CompatPortable); diff --git a/bench/src/lib.rs b/bench/src/lib.rs index cf654211..0d3b57ff 100644 --- a/bench/src/lib.rs +++ b/bench/src/lib.rs @@ -1,6 +1,8 @@ use criterion::{measurement::Measurement, BenchmarkGroup, BenchmarkId, Criterion, Throughput}; use simdutf8::basic::from_utf8 as basic_from_utf8; use simdutf8::compat::from_utf8 as compat_from_utf8; +use simdutf8_portable::basic::from_utf8 as basic_from_utf8_portable; +use simdutf8_portable::compat::from_utf8 as compat_from_utf8_portable; use std::str::from_utf8 as std_from_utf8; @@ -29,6 +31,8 @@ pub enum BenchFn { Basic, BasicNoInline, Compat, + BasicPortable, + CompatPortable, Std, #[cfg(feature = "simdjson")] @@ -134,11 +138,12 @@ fn get_valid_slice_of_len_or_more_aligned( fn bench(c: &mut Criterion, name: &str, bytes: &[u8], bench_fn: BenchFn) { let mut group = c.benchmark_group(name); for i in [1, 8, 64, 512, 4096, 65536, 131072].iter() { + let i = i + 33; let alignment = Alignment { boundary: 64, offset: 8, // 8 is the default alignment on 64-bit, so this is what can be expected worst-case }; - let (vec, offset) = get_valid_slice_of_len_or_more_aligned(bytes, *i, alignment); + let (vec, offset) = get_valid_slice_of_len_or_more_aligned(bytes, i, alignment); let slice = &vec[offset..]; assert_eq!( (slice.as_ptr() as usize) % alignment.boundary, @@ -192,6 +197,24 @@ fn bench_input( }, ); } + BenchFn::BasicPortable => { + group.bench_with_input( + BenchmarkId::from_parameter(format!("{:06}", input.len())), + &input, + |b, &slice| { + b.iter(|| assert_eq!(basic_from_utf8_portable(slice).is_ok(), expected_ok)); + }, + ); + } + BenchFn::CompatPortable => { + group.bench_with_input( + BenchmarkId::from_parameter(format!("{:06}", input.len())), + &input, + |b, &slice| { + b.iter(|| assert_eq!(compat_from_utf8_portable(slice).is_ok(), expected_ok)); + }, + ); + } BenchFn::Std => { group.bench_with_input( BenchmarkId::from_parameter(format!("{:06}", input.len())), diff --git a/nightly_workspace/.gitignore b/nightly_workspace/.gitignore new file mode 100644 index 00000000..1b72444a --- /dev/null +++ b/nightly_workspace/.gitignore @@ -0,0 +1,2 @@ +/Cargo.lock +/target diff --git a/nightly_workspace/Cargo.toml b/nightly_workspace/Cargo.toml new file mode 100644 index 00000000..0beb84a2 --- /dev/null +++ b/nightly_workspace/Cargo.toml @@ -0,0 +1,6 @@ +[workspace] +members = [ + "simdutf8", + "simdutf8/portable", + "simdutf8/bench" +] diff --git a/nightly_workspace/simdutf8 b/nightly_workspace/simdutf8 new file mode 120000 index 00000000..a96aa0ea --- /dev/null +++ b/nightly_workspace/simdutf8 @@ -0,0 +1 @@ +.. \ No newline at end of file diff --git a/portable/.gitignore b/portable/.gitignore new file mode 100644 index 00000000..5038f87b --- /dev/null +++ b/portable/.gitignore @@ -0,0 +1,6 @@ +/target +/.vscode +/.idea +/.zed +/.cargo +/Cargo.lock diff --git a/portable/.prettierrc.toml b/portable/.prettierrc.toml new file mode 100644 index 00000000..3a7aca30 --- /dev/null +++ b/portable/.prettierrc.toml @@ -0,0 +1,2 @@ +proseWrap = "always" +printWidth = 100 diff --git a/portable/Cargo.toml b/portable/Cargo.toml new file mode 100644 index 00000000..601f03aa --- /dev/null +++ b/portable/Cargo.toml @@ -0,0 +1,40 @@ +[package] +name = "simdutf8-portable" +version = "0.0.1" +authors = ["Hans Kratz "] +edition = "2024" +description = "SIMD-accelerated UTF-8 validation using core::simd (experimental)" +documentation = "https://docs.rs/simdutf8-portable/" +homepage = "https://github.com/rusticstuff/simdutf8/tree/main/portable" +repository = "https://github.com/rusticstuff/simdutf8" +readme = "README.md" +keywords = ["utf-8", "unicode", "string", "validation", "simd"] +categories = ["encoding", "algorithms", "no-std"] +license = "MIT OR Apache-2.0" + +[features] +default = ["std"] + +std = [] + +# expose SIMD implementations in basic::imp::* and compat::imp::* +public_imp = [] + +# features to force a certain implementation. Features earlier in the list take +# precedence. + +# force non-SIMD fallback implementation (for testing) +force_fallback = [] +# force 128-bit/256-bit SIMD implementation. +# CAVE: slower than even the fallback implementation if not all SIMD functions +# have a fast implementation, in particular `swizzle_dyn` needs to be fast. +force_simd128 = [] +force_simd256 = [] + +[package.metadata.docs.rs] +features = ["public_imp"] +rustdoc-args = ["--cfg", "docsrs"] +default-target = "x86_64-unknown-linux-gnu" + +[dependencies] +cfg-if = "1.0.0" diff --git a/portable/README.md b/portable/README.md new file mode 100644 index 00000000..6b65cb12 --- /dev/null +++ b/portable/README.md @@ -0,0 +1,113 @@ +[![CI](https://github.com/rusticstuff/simdutf8/actions/workflows/portable.yml/badge.svg)](https://github.com/rusticstuff/simdutf8/actions/workflows/portable.yml) +[![crates.io](https://img.shields.io/crates/v/simdutf8-portable.svg)](https://crates.io/crates/simdutf8-portable) +[![docs.rs](https://docs.rs/simdutf8-portable/badge.svg)](https://docs.rs/simdutf8-portable) + +# simdutf8-portable – Fast UTF-8 validation using `core::simd` (portable SIMD) + +Fast API-compatible UTF-8 validation for Rust using the experimental +[`core::simd`](https://doc.rust-lang.org/core/simd/index.html) (portable SIMD) module from the +standard library. An up-to-date nightly Rust compiler is required. The API and the algorithm are the +same as in the [simdutf8](https://crates.io/crates/simdutf8) crate. + +## Features + +- no unsafe code (`#[forbid(unsafe_code)]`) in the implementation +- good performance (with **caveats**, see [architecture notes](#Architecture-notes)) +- `auto` module which selects the best implementation for known-good targets at compile-time + including falling back to `core::str:from_utf8`. +- new platforms need no new code as long as they are supported by `core::simd` +- `no_std` support +- supports 128-bit and 256-bit SIMD + +## Limitations + +The functions in `core::simd` are marked as `#[inline]`. This way they are compiled to MIR Code generation is deferred until they are actually used in a crate. +- target-feature required +- no runtime implementation selection +- slower + - memcpy calls +- swizzle_dyn + - slow on non-special-cased targets + - requires -Zbuild-std for sse4.2, avx2 support if not part of the target architecture + +## Performance + +- There are no unnecessary bounds checks in the compiled code, functions are properly inlined and + loops properly unrolled (as of nightly-xx). +- uses memcpy because of forbid(unsafe), see https://github.com/llvm/llvm-project/issues/87440 +- swizzle_dyn slower because of abstraction + +## Architecture notes + +| Architecture | [Targets](https://doc.rust-lang.org/nightly/rustc/platform-support.html) | Performance\* | Notes | +| --------------- | ------------------------------------------------------------------------ | --------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| aarch64 | all | xx% of simdutf8 | works out of the box | +| x86_64 (avx2) | all | xx% of simdutf8 | requires `-Zbuild-std` and `RUSTFLAGS="-C target-feature=+avx2"` | +| x86_64 (sse4.2) | all | xx% of simdutf8 | requires `-Zbuild-std` and `RUSTFLAGS="-C target-feature=+sse4.2"` | +| wasm32 | all | (not tested) | requires `-Zbuild-std` and `RUSTFLAGS="-C target-feature=+simd128"` | +| armv7 | thumbv7neon‑\* | (not tested) | works out of the box | +| armv7 | others | (not tested) | requires `-Zbuild-std` and `RUSTFLAGS="-C target-feature=+neon"` | +| other | ... | bad | falls back to `core::str::from_utf8` unless `forcesimd128` or `forcesimd256` are used. Check [`swizzle_dyn` support](https://github.com/rust-lang/rust/blob/master/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs) before forcing. | + +\*) using `basic::from_utf8` + +## Quick start + +Add the dependency to your Cargo.toml file: + +```toml +[dependencies] +simdutf8-portable = "0.0.1" +``` + +Use `simdutf8-portable::basic::from_utf8()` as a drop-in replacement for `std::str::from_utf8()`. + +```rust +use simdutf8-portable::basic::from_utf8; + +println!("{}", from_utf8(b"I \xE2\x9D\xA4\xEF\xB8\x8F UTF-8!").unwrap()); +``` + +If you need detailed information on validation failures, use `simdutf8::compat::from_utf8()` +instead. + +```rust +use simdutf8-portable::compat::from_utf8; + +let err = from_utf8(b"I \xE2\x9D\xA4\xEF\xB8 UTF-8!").unwrap_err(); +assert_eq!(err.valid_up_to(), 5); +assert_eq!(err.error_len(), Some(2)); +``` + +## APIs + +See docs or [simdutf8](https://crates.io/crates/simdutf8). + +## Minimum Supported Rust Version (MSRV) + +Rust nightly as of xx xx + +## Thanks + +- to [Heinz N. Gies](https://github.com/licenser) for the initial portable SIMD implementation, +- to the authors of `core::simd` module, +- to the authors of simdjson for coming up with the high-performance SIMD implementation, +- to the authors of the simdjson Rust port who did most of the initial heavy lifting of porting the + C++ code to Rust. + +## License + +This code is dual-licensed under the +[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0.html) and the +[MIT License](https://opensource.org/licenses/MIT). + +It is based on code distributed with simd-json.rs, the Rust port of simdjson, which is dual-licensed +under the MIT license and Apache 2.0 license as well. + +simdjson itself is distributed under the Apache License 2.0. + +## References + +John Keiser, Daniel Lemire, +[Validating UTF-8 In Less Than One Instruction Per Byte](https://arxiv.org/abs/2010.03090), +Software: Practice and Experience 51 (5), 2021 diff --git a/portable/baseline/basic.s b/portable/baseline/basic.s new file mode 100644 index 00000000..8ebeaa21 --- /dev/null +++ b/portable/baseline/basic.s @@ -0,0 +1,370 @@ +.section __TEXT,__text,regular,pure_instructions + .globl simdutf8_portable::implementation::validate_utf8_basic_simd + .p2align 2 +simdutf8_portable::implementation::validate_utf8_basic_simd: +Lfunc_begin4: + .cfi_startproc + stp x29, x30, [sp, #-16]! + .cfi_def_cfa_offset 16 + mov x29, sp + .cfi_def_cfa w29, 16 + .cfi_offset w30, -8 + .cfi_offset w29, -16 + sub x9, sp, #144 + and sp, x9, #0xffffffffffffffe0 + mov x8, x0 + mov x11, #0 + and x9, x1, #0xffffffffffffffc0 + mov x10, x9 +LBB4_1: + cmp x11, x9 + b.hs LBB4_8 + add x12, x8, x11 + ldp q18, q17, [x12] + ldp q16, q7, [x12, #32] + add x11, x11, #64 + orr.16b v0, v17, v18 + orr.16b v1, v16, v7 + orr.16b v0, v0, v1 + umaxv.16b b0, v0 + fmov w12, s0 + tbz w12, #7, LBB4_1 + movi.2d v4, #0000000000000000 + ext.16b v2, v4, v18, #15 + ushr.16b v1, v2, #4 +Lloh24: + adrp x10, lCPI4_0@PAGE +Lloh25: + ldr q0, [x10, lCPI4_0@PAGEOFF] + tbl.16b v5, { v0 }, v1 + movi.16b v1, #15 + and.16b v3, v2, v1 +Lloh26: + adrp x10, lCPI4_1@PAGE +Lloh27: + ldr q2, [x10, lCPI4_1@PAGEOFF] + tbl.16b v6, { v2 }, v3 + ushr.16b v19, v18, #4 +Lloh28: + adrp x10, lCPI4_2@PAGE +Lloh29: + ldr q3, [x10, lCPI4_2@PAGEOFF] + tbl.16b v19, { v3 }, v19 + and.16b v5, v6, v5 + and.16b v19, v5, v19 + ext.16b v5, v4, v18, #14 + ext.16b v6, v4, v18, #13 + movi.16b v4, #223 + cmhi.16b v20, v5, v4 + movi.16b v5, #239 + cmhi.16b v6, v6, v5 + orr.16b v20, v6, v20 + movi.16b v6, #128 + and.16b v20, v20, v6 + eor.16b v19, v19, v20 + ext.16b v20, v18, v17, #15 + ushr.16b v21, v20, #4 + tbl.16b v21, { v0 }, v21 + and.16b v20, v20, v1 + tbl.16b v20, { v2 }, v20 + ushr.16b v22, v17, #4 + tbl.16b v22, { v3 }, v22 + and.16b v20, v20, v21 + and.16b v20, v20, v22 + ext.16b v21, v18, v17, #14 + ext.16b v18, v18, v17, #13 + cmhi.16b v21, v21, v4 + cmhi.16b v18, v18, v5 + orr.16b v18, v18, v21 + and.16b v18, v18, v6 + eor.16b v18, v20, v18 + orr.16b v18, v18, v19 + ext.16b v19, v17, v16, #15 + ushr.16b v20, v19, #4 + tbl.16b v20, { v0 }, v20 + and.16b v19, v19, v1 + tbl.16b v19, { v2 }, v19 + ushr.16b v21, v16, #4 + tbl.16b v21, { v3 }, v21 + and.16b v19, v19, v20 + and.16b v19, v19, v21 + ext.16b v20, v17, v16, #14 + ext.16b v17, v17, v16, #13 + cmhi.16b v20, v20, v4 + cmhi.16b v17, v17, v5 + orr.16b v17, v17, v20 + and.16b v17, v17, v6 + eor.16b v17, v19, v17 + ext.16b v19, v16, v7, #15 + ushr.16b v20, v19, #4 + tbl.16b v20, { v0 }, v20 + and.16b v19, v19, v1 + tbl.16b v19, { v2 }, v19 + ushr.16b v21, v7, #4 + tbl.16b v21, { v3 }, v21 + and.16b v19, v19, v20 + and.16b v19, v19, v21 + ext.16b v20, v16, v7, #14 + ext.16b v16, v16, v7, #13 + cmhi.16b v20, v20, v4 + cmhi.16b v16, v16, v5 + orr.16b v16, v16, v20 + and.16b v16, v16, v6 + eor.16b v16, v19, v16 + orr.16b v16, v17, v16 + orr.16b v23, v18, v16 +Lloh30: + adrp x10, lCPI4_3@PAGE +Lloh31: + ldr q16, [x10, lCPI4_3@PAGEOFF] + uqsub.16b v19, v7, v16 + cmp x11, x9 + b.hs LBB4_12 + mov x10, x11 + b LBB4_6 +LBB4_5: + ext.16b v19, v7, v20, #15 + ushr.16b v21, v19, #4 + tbl.16b v21, { v0 }, v21 + and.16b v19, v19, v1 + tbl.16b v19, { v2 }, v19 + ushr.16b v22, v20, #4 + tbl.16b v22, { v3 }, v22 + and.16b v19, v19, v21 + and.16b v19, v19, v22 + ext.16b v21, v7, v20, #14 + ext.16b v7, v7, v20, #13 + cmhi.16b v21, v21, v4 + cmhi.16b v7, v7, v5 + orr.16b v7, v7, v21 + and.16b v7, v7, v6 + eor.16b v7, v19, v7 + ext.16b v19, v20, v18, #15 + ushr.16b v21, v19, #4 + tbl.16b v21, { v0 }, v21 + and.16b v19, v19, v1 + tbl.16b v19, { v2 }, v19 + ushr.16b v22, v18, #4 + tbl.16b v22, { v3 }, v22 + and.16b v19, v19, v21 + and.16b v19, v19, v22 + ext.16b v21, v20, v18, #14 + ext.16b v20, v20, v18, #13 + cmhi.16b v21, v21, v4 + cmhi.16b v20, v20, v5 + orr.16b v20, v20, v21 + and.16b v20, v20, v6 + eor.16b v19, v19, v20 + ext.16b v20, v18, v17, #15 + ushr.16b v21, v20, #4 + tbl.16b v21, { v0 }, v21 + and.16b v20, v20, v1 + tbl.16b v20, { v2 }, v20 + ushr.16b v22, v17, #4 + tbl.16b v22, { v3 }, v22 + and.16b v20, v20, v21 + and.16b v20, v20, v22 + ext.16b v21, v18, v17, #14 + ext.16b v18, v18, v17, #13 + cmhi.16b v21, v21, v4 + cmhi.16b v18, v18, v5 + orr.16b v18, v18, v21 + and.16b v18, v18, v6 + eor.16b v18, v20, v18 + ext.16b v20, v17, v24, #15 + ushr.16b v21, v20, #4 + tbl.16b v21, { v0 }, v21 + and.16b v20, v20, v1 + tbl.16b v20, { v2 }, v20 + ushr.16b v22, v24, #4 + tbl.16b v22, { v3 }, v22 + and.16b v20, v20, v21 + and.16b v20, v20, v22 + ext.16b v21, v17, v24, #14 + ext.16b v17, v17, v24, #13 + cmhi.16b v21, v21, v4 + cmhi.16b v17, v17, v5 + orr.16b v17, v17, v21 + and.16b v17, v17, v6 + eor.16b v17, v20, v17 + orr.16b v7, v19, v7 + orr.16b v17, v18, v17 + orr.16b v19, v7, v17 + mov.16b v7, v24 + uqsub.16b v17, v24, v16 + orr.16b v23, v19, v23 + add x10, x10, #64 + mov.16b v19, v17 + cmp x10, x9 + b.hs LBB4_9 +LBB4_6: + add x11, x8, x10 + ldp q20, q18, [x11] + ldp q17, q24, [x11, #32] + orr.16b v21, v18, v20 + orr.16b v22, v17, v24 + orr.16b v21, v21, v22 + umaxv.16b b21, v21 + fmov w11, s21 + tbnz w11, #7, LBB4_5 + mov.16b v17, v19 + mov.16b v24, v7 + orr.16b v23, v19, v23 + add x10, x10, #64 + mov.16b v19, v17 + cmp x10, x9 + b.lo LBB4_6 + b LBB4_9 +LBB4_8: + movi.2d v23, #0000000000000000 + movi.2d v17, #0000000000000000 + movi.2d v24, #0000000000000000 +LBB4_9: + subs x2, x1, x10 + b.ls LBB4_14 +LBB4_10: + stp q24, q17, [sp, #16] + movi.2d v0, #0000000000000000 + stp q0, q0, [sp, #96] + str q0, [sp, #80] + stp q23, q0, [sp, #48] + add x0, sp, #64 + add x1, x8, x10 + bl _memcpy + ldp q3, q2, [sp, #64] + ldp q1, q0, [sp, #96] + orr.16b v4, v2, v3 + orr.16b v5, v1, v0 + orr.16b v4, v4, v5 + umaxv.16b b4, v4 + fmov w8, s4 + tbnz w8, #7, LBB4_13 + ldp q17, q23, [sp, #32] + orr.16b v23, v17, v23 + b LBB4_14 +LBB4_12: + mov.16b v17, v19 + mov.16b v24, v7 + mov x10, x11 + subs x2, x1, x11 + b.hi LBB4_10 + b LBB4_14 +LBB4_13: + ldr q19, [sp, #16] + ext.16b v4, v19, v3, #15 + ushr.16b v5, v4, #4 +Lloh32: + adrp x8, lCPI4_0@PAGE +Lloh33: + ldr q6, [x8, lCPI4_0@PAGEOFF] + tbl.16b v5, { v6 }, v5 + movi.16b v7, #15 + and.16b v4, v4, v7 +Lloh34: + adrp x8, lCPI4_1@PAGE +Lloh35: + ldr q16, [x8, lCPI4_1@PAGEOFF] + tbl.16b v4, { v16 }, v4 + ushr.16b v17, v3, #4 +Lloh36: + adrp x8, lCPI4_2@PAGE +Lloh37: + ldr q18, [x8, lCPI4_2@PAGEOFF] + tbl.16b v17, { v18 }, v17 + and.16b v4, v4, v5 + and.16b v4, v4, v17 + ext.16b v5, v19, v3, #14 + ext.16b v17, v19, v3, #13 + movi.16b v19, #223 + cmhi.16b v5, v5, v19 + movi.16b v20, #239 + cmhi.16b v17, v17, v20 + orr.16b v5, v17, v5 + movi.16b v17, #128 + and.16b v5, v5, v17 + eor.16b v4, v4, v5 + ext.16b v5, v3, v2, #15 + ushr.16b v21, v5, #4 + tbl.16b v21, { v6 }, v21 + and.16b v5, v5, v7 + tbl.16b v5, { v16 }, v5 + ushr.16b v22, v2, #4 + tbl.16b v22, { v18 }, v22 + and.16b v5, v5, v21 + and.16b v5, v5, v22 + ext.16b v21, v3, v2, #14 + ext.16b v3, v3, v2, #13 + cmhi.16b v21, v21, v19 + cmhi.16b v3, v3, v20 + orr.16b v3, v3, v21 + and.16b v3, v3, v17 + eor.16b v3, v5, v3 + ext.16b v5, v2, v1, #15 + ushr.16b v21, v5, #4 + tbl.16b v21, { v6 }, v21 + and.16b v5, v5, v7 + tbl.16b v5, { v16 }, v5 + ushr.16b v22, v1, #4 + tbl.16b v22, { v18 }, v22 + and.16b v5, v5, v21 + and.16b v5, v5, v22 + ext.16b v21, v2, v1, #14 + ext.16b v2, v2, v1, #13 + cmhi.16b v21, v21, v19 + cmhi.16b v2, v2, v20 + orr.16b v2, v2, v21 + and.16b v2, v2, v17 + eor.16b v2, v5, v2 + ext.16b v5, v1, v0, #15 + ushr.16b v21, v5, #4 + tbl.16b v6, { v6 }, v21 + and.16b v5, v5, v7 + tbl.16b v5, { v16 }, v5 + ushr.16b v7, v0, #4 + tbl.16b v7, { v18 }, v7 + and.16b v5, v5, v6 + and.16b v5, v5, v7 + ext.16b v6, v1, v0, #14 + ext.16b v1, v1, v0, #13 + cmhi.16b v6, v6, v19 + cmhi.16b v1, v1, v20 + orr.16b v1, v1, v6 + and.16b v1, v1, v17 + eor.16b v1, v5, v1 + orr.16b v3, v4, v3 + orr.16b v1, v2, v1 + orr.16b v1, v3, v1 + ldr q23, [sp, #48] + orr.16b v23, v1, v23 +Lloh38: + adrp x8, lCPI4_3@PAGE +Lloh39: + ldr q1, [x8, lCPI4_3@PAGEOFF] + uqsub.16b v17, v0, v1 +LBB4_14: + orr.16b v0, v17, v23 + umaxv.16b b0, v0 + fmov w8, s0 + tst w8, #0xff + cset w0, ne + mov sp, x29 + .cfi_def_cfa wsp, 16 + ldp x29, x30, [sp], #16 + .cfi_def_cfa_offset 0 + .cfi_restore w30 + .cfi_restore w29 + ret + .loh AdrpLdr Lloh30, Lloh31 + .loh AdrpAdrp Lloh28, Lloh30 + .loh AdrpLdr Lloh28, Lloh29 + .loh AdrpAdrp Lloh26, Lloh28 + .loh AdrpLdr Lloh26, Lloh27 + .loh AdrpAdrp Lloh24, Lloh26 + .loh AdrpLdr Lloh24, Lloh25 + .loh AdrpLdr Lloh38, Lloh39 + .loh AdrpAdrp Lloh36, Lloh38 + .loh AdrpLdr Lloh36, Lloh37 + .loh AdrpAdrp Lloh34, Lloh36 + .loh AdrpLdr Lloh34, Lloh35 + .loh AdrpAdrp Lloh32, Lloh34 + .loh AdrpLdr Lloh32, Lloh33 diff --git a/portable/baseline/compat.s b/portable/baseline/compat.s new file mode 100644 index 00000000..dcb20098 --- /dev/null +++ b/portable/baseline/compat.s @@ -0,0 +1,432 @@ +.section __TEXT,__text,regular,pure_instructions + .globl simdutf8_portable::implementation::validate_utf8_compat_simd + .p2align 2 +simdutf8_portable::implementation::validate_utf8_compat_simd: +Lfunc_begin5: + .cfi_startproc + sub sp, sp, #224 + .cfi_def_cfa_offset 224 + stp x22, x21, [sp, #176] + stp x20, x19, [sp, #192] + stp x29, x30, [sp, #208] + add x29, sp, #208 + .cfi_def_cfa w29, 16 + .cfi_offset w30, -8 + .cfi_offset w29, -16 + .cfi_offset w19, -24 + .cfi_offset w20, -32 + .cfi_offset w21, -40 + .cfi_offset w22, -48 + .cfi_remember_state + mov x19, x1 + mov x20, x0 + mov x21, #0 + and x9, x1, #0xffffffffffffffc0 + movi.2d v20, #0000000000000000 +Lloh40: + adrp x10, lCPI5_0@PAGE +Lloh41: + ldr q22, [x10, lCPI5_0@PAGEOFF] +Lloh42: + adrp x10, lCPI5_1@PAGE +Lloh43: + ldr q23, [x10, lCPI5_1@PAGEOFF] +Lloh44: + adrp x10, lCPI5_2@PAGE +Lloh45: + ldr q24, [x10, lCPI5_2@PAGEOFF] +Lloh46: + adrp x10, lCPI5_3@PAGE +Lloh47: + ldr q21, [x10, lCPI5_3@PAGEOFF] + mov w10, #1 + movi.16b v0, #15 + movi.16b v1, #223 + movi.16b v2, #239 + movi.16b v3, #128 + movi.2d v18, #0000000000000000 + movi.2d v19, #0000000000000000 + cmp x21, x9 + tbz w10, #0, LBB5_4 +LBB5_1: + b.hs LBB5_15 +LBB5_2: + add x11, x20, x21 + ldp q7, q6, [x11] + ldp q5, q4, [x11, #32] + orr.16b v16, v6, v7 + orr.16b v17, v5, v4 + orr.16b v16, v16, v17 + umaxv.16b b16, v16 + fmov w11, s16 + tbnz w11, #7, LBB5_11 + add x21, x21, #64 + cmp x21, x9 + b.lo LBB5_2 + b LBB5_15 + b.hs LBB5_15 + add x11, x20, x21 + ldp q6, q5, [x11] + ldp q4, q7, [x11, #32] + orr.16b v16, v5, v6 + orr.16b v17, v4, v7 + orr.16b v16, v16, v17 + umaxv.16b b16, v16 + fmov w11, s16 + tbnz w11, #7, LBB5_7 + orr.16b v20, v20, v18 + umaxv.16b b4, v20 + fmov w11, s4 + cbz w11, LBB5_13 + b LBB5_29 + ext.16b v17, v19, v6, #15 + ext.16b v18, v19, v6, #14 + ext.16b v16, v19, v6, #13 + mov.16b v19, v7 + ushr.16b v7, v17, #4 + tbl.16b v7, { v22 }, v7 + and.16b v17, v17, v0 + tbl.16b v17, { v23 }, v17 + and.16b v7, v17, v7 + ushr.16b v17, v6, #4 + tbl.16b v17, { v24 }, v17 + and.16b v7, v7, v17 + cmhi.16b v17, v18, v1 + cmhi.16b v16, v16, v2 + orr.16b v16, v16, v17 + and.16b v16, v16, v3 + eor.16b v7, v7, v16 + ext.16b v16, v6, v5, #15 + ushr.16b v17, v16, #4 + tbl.16b v17, { v22 }, v17 + and.16b v16, v16, v0 + tbl.16b v16, { v23 }, v16 + ushr.16b v18, v5, #4 + tbl.16b v18, { v24 }, v18 + and.16b v16, v16, v17 + and.16b v16, v16, v18 + ext.16b v17, v6, v5, #14 + ext.16b v6, v6, v5, #13 + cmhi.16b v17, v17, v1 + cmhi.16b v6, v6, v2 + orr.16b v6, v6, v17 + and.16b v6, v6, v3 + eor.16b v6, v16, v6 + ext.16b v16, v5, v4, #15 + ushr.16b v17, v16, #4 + tbl.16b v17, { v22 }, v17 + and.16b v16, v16, v0 + tbl.16b v16, { v23 }, v16 + ushr.16b v18, v4, #4 + tbl.16b v18, { v24 }, v18 + and.16b v16, v16, v17 + and.16b v16, v16, v18 + ext.16b v17, v5, v4, #14 + ext.16b v5, v5, v4, #13 + cmhi.16b v17, v17, v1 + cmhi.16b v5, v5, v2 + orr.16b v5, v5, v17 + and.16b v5, v5, v3 + eor.16b v5, v16, v5 + ext.16b v16, v4, v19, #15 + ushr.16b v17, v16, #4 + tbl.16b v17, { v22 }, v17 + and.16b v16, v16, v0 + tbl.16b v16, { v23 }, v16 + ushr.16b v18, v19, #4 + tbl.16b v18, { v24 }, v18 + and.16b v16, v16, v17 + and.16b v16, v16, v18 + ext.16b v17, v4, v19, #14 + ext.16b v4, v4, v19, #13 + cmhi.16b v17, v17, v1 + cmhi.16b v4, v4, v2 + orr.16b v4, v4, v17 + and.16b v4, v4, v3 + eor.16b v4, v16, v4 + orr.16b v7, v7, v20 + orr.16b v5, v6, v5 + orr.16b v5, v7, v5 + orr.16b v20, v5, v4 + umaxv.16b b4, v20 + fmov w11, s4 + cbnz w11, LBB5_29 + add x11, x21, #64 + cmp x11, x9 + b.hs LBB5_14 + add x12, x20, x21 + ldp q6, q5, [x12, #64] + ldp q4, q7, [x12, #96] + orr.16b v16, v5, v6 + orr.16b v17, v4, v7 + orr.16b v16, v16, v17 + umaxv.16b b16, v16 + fmov w12, s16 + mov x21, x11 + tbnz w12, #7, LBB5_7 + uqsub.16b v18, v19, v21 + mov x21, x11 + orr.16b v20, v20, v18 + umaxv.16b b4, v20 + fmov w11, s4 + cbz w11, LBB5_13 + b LBB5_29 + ext.16b v16, v19, v7, #15 + ushr.16b v17, v16, #4 + tbl.16b v17, { v22 }, v17 + and.16b v16, v16, v0 + tbl.16b v16, { v23 }, v16 + ushr.16b v18, v7, #4 + tbl.16b v18, { v24 }, v18 + and.16b v16, v16, v17 + and.16b v16, v16, v18 + ext.16b v17, v19, v7, #14 + ext.16b v18, v19, v7, #13 + cmhi.16b v17, v17, v1 + cmhi.16b v18, v18, v2 + orr.16b v17, v18, v17 + and.16b v17, v17, v3 + eor.16b v16, v16, v17 + ext.16b v17, v7, v6, #15 + ushr.16b v18, v17, #4 + tbl.16b v18, { v22 }, v18 + and.16b v17, v17, v0 + tbl.16b v17, { v23 }, v17 + ushr.16b v19, v6, #4 + tbl.16b v19, { v24 }, v19 + and.16b v17, v17, v18 + and.16b v17, v17, v19 + ext.16b v18, v7, v6, #14 + ext.16b v7, v7, v6, #13 + cmhi.16b v18, v18, v1 + cmhi.16b v7, v7, v2 + orr.16b v7, v7, v18 + and.16b v7, v7, v3 + eor.16b v7, v17, v7 + ext.16b v17, v6, v5, #15 + ushr.16b v18, v17, #4 + tbl.16b v18, { v22 }, v18 + and.16b v17, v17, v0 + tbl.16b v17, { v23 }, v17 + ushr.16b v19, v5, #4 + tbl.16b v19, { v24 }, v19 + and.16b v17, v17, v18 + and.16b v17, v17, v19 + ext.16b v18, v6, v5, #14 + ext.16b v6, v6, v5, #13 + cmhi.16b v18, v18, v1 + cmhi.16b v6, v6, v2 + orr.16b v6, v6, v18 + and.16b v6, v6, v3 + eor.16b v6, v17, v6 + ext.16b v17, v5, v4, #15 + ushr.16b v18, v17, #4 + tbl.16b v18, { v22 }, v18 + and.16b v17, v17, v0 + tbl.16b v17, { v23 }, v17 + ushr.16b v19, v4, #4 + tbl.16b v19, { v24 }, v19 + and.16b v17, v17, v18 + and.16b v17, v17, v19 + ext.16b v18, v5, v4, #14 + ext.16b v5, v5, v4, #13 + cmhi.16b v18, v18, v1 + cmhi.16b v5, v5, v2 + orr.16b v5, v5, v18 + and.16b v5, v5, v3 + eor.16b v5, v17, v5 + orr.16b v7, v16, v7 + orr.16b v5, v6, v5 + orr.16b v5, v7, v5 + orr.16b v20, v5, v20 + umaxv.16b b5, v20 + fmov w11, s5 + cbnz w11, LBB5_29 + uqsub.16b v18, v4, v21 + mov.16b v19, v4 + add x21, x21, #64 + eor w10, w10, #0x1 + cmp x21, x9 + tbz w10, #0, LBB5_4 + b LBB5_1 +LBB5_14: + uqsub.16b v18, v19, v21 + mov x21, x11 +LBB5_15: + subs x2, x19, x21 + b.ls LBB5_24 + movi.2d v0, #0000000000000000 + stp q0, q0, [x29, #-64] + stp q0, q0, [x29, #-96] + add x1, x20, x21 + sub x0, x29, #96 + subs x9, x2, #32 + b.hs LBB5_26 + subs x9, x2, #16 + b.hs LBB5_27 +LBB5_18: + subs x9, x2, #8 + str q20, [sp, #96] + b.hs LBB5_28 +LBB5_19: + cbz x2, LBB5_21 + mov x22, x8 + stp q22, q21, [sp, #64] + stp q24, q23, [sp, #32] + stp q18, q19, [sp] + bl _memcpy + ldp q18, q19, [sp] + ldp q24, q23, [sp, #32] + ldp q22, q21, [sp, #64] + mov x8, x22 +LBB5_21: + ldp q3, q2, [x29, #-96] + ldp q1, q0, [x29, #-64] + orr.16b v4, v2, v3 + orr.16b v5, v1, v0 + orr.16b v4, v4, v5 + umaxv.16b b4, v4 + fmov w9, s4 + mov.16b v4, v18 + tbz w9, #7, LBB5_23 + ext.16b v4, v19, v3, #15 + ushr.16b v5, v4, #4 + tbl.16b v5, { v22 }, v5 + movi.16b v6, #15 + and.16b v4, v4, v6 + tbl.16b v4, { v23 }, v4 + ushr.16b v7, v3, #4 + tbl.16b v7, { v24 }, v7 + and.16b v4, v4, v5 + and.16b v4, v4, v7 + ext.16b v5, v19, v3, #14 + ext.16b v7, v19, v3, #13 + movi.16b v16, #223 + cmhi.16b v5, v5, v16 + movi.16b v17, #239 + cmhi.16b v7, v7, v17 + orr.16b v5, v7, v5 + movi.16b v7, #128 + and.16b v5, v5, v7 + eor.16b v4, v4, v5 + ext.16b v5, v3, v2, #15 + ushr.16b v18, v5, #4 + tbl.16b v18, { v22 }, v18 + and.16b v5, v5, v6 + tbl.16b v5, { v23 }, v5 + ushr.16b v19, v2, #4 + tbl.16b v19, { v24 }, v19 + and.16b v5, v5, v18 + and.16b v5, v5, v19 + ext.16b v18, v3, v2, #14 + ext.16b v3, v3, v2, #13 + cmhi.16b v18, v18, v16 + cmhi.16b v3, v3, v17 + orr.16b v3, v3, v18 + and.16b v3, v3, v7 + eor.16b v3, v5, v3 + orr.16b v3, v3, v4 + ext.16b v4, v2, v1, #15 + ushr.16b v5, v4, #4 + tbl.16b v5, { v22 }, v5 + and.16b v4, v4, v6 + tbl.16b v4, { v23 }, v4 + ushr.16b v18, v1, #4 + tbl.16b v18, { v24 }, v18 + and.16b v4, v4, v5 + and.16b v4, v4, v18 + ext.16b v5, v2, v1, #14 + ext.16b v2, v2, v1, #13 + cmhi.16b v5, v5, v16 + cmhi.16b v2, v2, v17 + orr.16b v2, v2, v5 + and.16b v2, v2, v7 + eor.16b v2, v4, v2 + ext.16b v4, v1, v0, #15 + ushr.16b v5, v4, #4 + tbl.16b v5, { v22 }, v5 + and.16b v4, v4, v6 + tbl.16b v4, { v23 }, v4 + ushr.16b v6, v0, #4 + tbl.16b v6, { v24 }, v6 + and.16b v4, v4, v5 + and.16b v4, v4, v6 + ext.16b v5, v1, v0, #14 + ext.16b v1, v1, v0, #13 + cmhi.16b v5, v5, v16 + cmhi.16b v1, v1, v17 + orr.16b v1, v1, v5 + and.16b v1, v1, v7 + eor.16b v1, v4, v1 + orr.16b v1, v2, v1 + orr.16b v18, v3, v1 + uqsub.16b v4, v0, v21 + ldr q20, [sp, #96] + orr.16b v20, v18, v20 + mov.16b v18, v4 +LBB5_24: + orr.16b v0, v18, v20 + umaxv.16b b0, v0 + fmov w9, s0 + cbnz w9, LBB5_29 + mov w9, #2 + strb w9, [x8, #8] + .cfi_def_cfa wsp, 224 + ldp x29, x30, [sp, #208] + ldp x20, x19, [sp, #192] + ldp x22, x21, [sp, #176] + add sp, sp, #224 + .cfi_def_cfa_offset 0 + .cfi_restore w30 + .cfi_restore w29 + .cfi_restore w19 + .cfi_restore w20 + .cfi_restore w21 + .cfi_restore w22 + ret +LBB5_26: + .cfi_restore_state + ldp q0, q1, [x1], #32 + stp q0, q1, [x29, #-96] + add x0, x0, #32 + mov x2, x9 + subs x9, x9, #16 + b.lo LBB5_18 +LBB5_27: + ldr q0, [x1], #16 + str q0, [x0], #16 + mov x2, x9 + subs x9, x9, #8 + str q20, [sp, #96] + b.lo LBB5_19 +LBB5_28: + ldr x10, [x1], #8 + str x10, [x0], #8 + mov x2, x9 + cbnz x9, LBB5_20 + b LBB5_21 +LBB5_29: + mov x0, x8 + mov x1, x20 + mov x2, x19 + mov x3, x21 + .cfi_def_cfa wsp, 224 + ldp x29, x30, [sp, #208] + ldp x20, x19, [sp, #192] + ldp x22, x21, [sp, #176] + add sp, sp, #224 + .cfi_def_cfa_offset 0 + .cfi_restore w30 + .cfi_restore w29 + .cfi_restore w19 + .cfi_restore w20 + .cfi_restore w21 + .cfi_restore w22 + b simdutf8_portable::implementation::helpers::get_compat_error + .loh AdrpLdr Lloh46, Lloh47 + .loh AdrpAdrp Lloh44, Lloh46 + .loh AdrpLdr Lloh44, Lloh45 + .loh AdrpAdrp Lloh42, Lloh44 + .loh AdrpLdr Lloh42, Lloh43 + .loh AdrpAdrp Lloh40, Lloh42 + .loh AdrpLdr Lloh40, Lloh41 diff --git a/portable/src/basic.rs b/portable/src/basic.rs new file mode 100644 index 00000000..b99107bd --- /dev/null +++ b/portable/src/basic.rs @@ -0,0 +1,230 @@ +//! The `basic` API flavor provides barebones UTF-8 checking at the highest speed. +//! +//! It is fastest on valid UTF-8, but only checks for errors after processing the whole byte sequence +//! and does not provide detailed information if the data is not valid UTF-8. [`Utf8Error`] is a zero-sized error struct. +//! +//! If you need detailed error information use the functions from the [`crate::compat`] module instead. + +use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut}; + +use crate::implementation::validate_utf8_basic; + +/// Simple zero-sized UTF-8 error. +/// +/// No information is provided where the error occurred or how long the invalid byte +/// byte sequence is. +#[derive(Copy, Eq, PartialEq, Clone, Debug)] +pub struct Utf8Error; + +impl core::fmt::Display for Utf8Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str("invalid utf-8 sequence") + } +} + +impl core::error::Error for Utf8Error {} + +/// Analogue to [`std::str::from_utf8()`]. +/// +/// Checks if the passed byte sequence is valid UTF-8 and returns an +/// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is. +/// +/// # Errors +/// Will return the zero-sized Err([`Utf8Error`]) on if the input contains invalid UTF-8. +#[inline] +pub fn from_utf8(input: &[u8]) -> Result<&str, Utf8Error> { + unsafe { + validate_utf8_basic(input)?; + Ok(from_utf8_unchecked(input)) + } +} + +/// Analogue to [`std::str::from_utf8_mut()`]. +/// +/// Checks if the passed mutable byte sequence is valid UTF-8 and returns a mutable +/// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is. +/// +/// # Errors +/// Will return the zero-sized Err([`Utf8Error`]) on if the input contains invalid UTF-8. +#[inline] +pub fn from_utf8_mut(input: &mut [u8]) -> Result<&mut str, Utf8Error> { + unsafe { + validate_utf8_basic(input)?; + Ok(from_utf8_unchecked_mut(input)) + } +} + +/// Allows direct access to the platform-specific unsafe validation implementations. +#[cfg(feature = "public_imp")] +pub mod imp { + use crate::basic; + + /// A low-level interface for streaming validation of UTF-8 data. It is meant to be integrated + /// in high-performance data processing pipelines. + /// + /// Data can be streamed in arbitrarily-sized chunks using the [`Self::update()`] method. There is + /// no way to find out if the input so far was valid UTF-8 during the validation. Only when + /// the validation is completed with the [`Self::finalize()`] method the result of the validation is + /// returned. Use [`ChunkedUtf8Validator`] if possible for highest performance. + /// + /// This implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to use it if the required CPU features are not available which + /// is why all trait methods are `unsafe`. + /// + /// General usage: + /// ```rust + /// use simdutf8_portable::basic::imp::Utf8Validator; + /// use std::io::{stdin, Read, Result}; + /// + /// # #[cfg(target_arch = "x86_64")] + /// fn main() -> Result<()> { + /// unsafe { + /// if !std::is_x86_feature_detected!("avx2") { + /// panic!("This example only works with CPUs supporting AVX 2"); + /// } + /// + /// let mut validator = simdutf8::basic::imp::x86::avx2::Utf8ValidatorImp::new(); + /// let mut buf = vec![0; 8192]; + /// loop { + /// let bytes_read = stdin().read(buf.as_mut())?; + /// if bytes_read == 0 { + /// break; + /// } + /// validator.update(&buf); + /// } + /// + /// if validator.finalize().is_ok() { + /// println!("Input is valid UTF-8"); + /// } else { + /// println!("Input is not valid UTF-8"); + /// } + /// } + /// + /// Ok(()) + /// } + /// + /// # #[cfg(not(target_arch = "x86_64"))] + /// # fn main() { } + /// ``` + /// + pub trait Utf8Validator { + /// Creates a new validator. + /// + /// # Safety + /// This implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to call it if the required CPU features are not available. + #[must_use] + fn new() -> Self + where + Self: Sized; + + /// Updates the validator with `input`. + /// + /// # Safety + /// This implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to call it if the required CPU features are not available. + fn update(&mut self, input: &[u8]); + + /// Finishes the validation and returns `Ok(())` if the input was valid UTF-8. + /// + /// # Errors + /// A [`basic::Utf8Error`] is returned if the input was not valid UTF-8. No + /// further information about the location of the error is provided. + /// + /// # Safety + /// This implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to call it if the required CPU features are not available. + fn finalize(self) -> core::result::Result<(), basic::Utf8Error>; + } + + /// Like [`Utf8Validator`] this low-level API is for streaming validation of UTF-8 data. + /// + /// It has additional restrictions imposed on how the input is passed in to allow + /// validation with as little overhead as possible. + /// + /// To feed it data you need to call the [`Self::update_from_chunks()`] method which takes slices which + /// have to be a multiple of 64 bytes long. The method will panic otherwise. There is + /// no way to find out if the input so far was valid UTF-8 during the validation. Only when + /// the validation is completed with the [`Self::finalize()`] method the result of the validation is + /// returned. + /// + /// The `Self::finalize()` method can be fed the rest of the data. There is no restriction on the + /// data passed to it. + /// + /// This implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to use it if the required CPU features are not available which + /// is why all trait methods are `unsafe`. + pub trait ChunkedUtf8Validator { + /// Creates a new validator. + /// + /// # Safety + /// This implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to call it if the required CPU features are not available. + #[must_use] + fn new() -> Self + where + Self: Sized; + + /// Updates the validator with `input`. + /// + /// # Panics + /// If `input.len()` is not a multiple of 64. + /// + /// # Safety + /// This implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to call it if the required CPU features are not available. + fn update_from_chunks(&mut self, input: &[u8]); + + /// Updates the validator with remaining input if any. There is no restriction on the + /// data provided. + /// + /// Finishes the validation and returns `Ok(())` if the input was valid UTF-8. + /// + /// # Errors + /// A [`basic::Utf8Error`] is returned if the input was not valid UTF-8. No + /// further information about the location of the error is provided. + /// + /// # Safety + /// This implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to call it if the required CPU features are not available. + fn finalize( + self, + remaining_input: core::option::Option<&[u8]>, + ) -> core::result::Result<(), basic::Utf8Error>; + } + + /// Best for current target as defined by compile-time arch and target features. If no fast + /// SIMD implementation is available, the scalar implementation from the standard library is + /// used as a fallback. + /// + /// However, the crate feature `force_nonsimd` forces the fallback implementation, `force_simd128` + /// forces the 128-bit SIMD implementation and `force_simd256` forces the 256-bit SIMD implementation, + /// in order of precedence. + /// + pub mod auto { + pub use crate::implementation::auto::validate_utf8_basic as validate_utf8; + pub use crate::implementation::auto::ChunkedUtf8ValidatorImp; + pub use crate::implementation::auto::Utf8ValidatorImp; + } + + /// Includes the scalar fallback implementation using 128-bit portable SIMD. + pub mod fallback { + pub use crate::implementation::fallback::validate_utf8_basic as validate_utf8; + pub use crate::implementation::fallback::ChunkedUtf8ValidatorImp; + pub use crate::implementation::fallback::Utf8ValidatorImp; + } + + /// Includes the validation implementation using 128-bit portable SIMD. + pub mod v128 { + pub use crate::implementation::simd::v128::validate_utf8_basic as validate_utf8; + pub use crate::implementation::simd::v128::ChunkedUtf8ValidatorImp; + pub use crate::implementation::simd::v128::Utf8ValidatorImp; + } + + /// Includes the validation implementation using 256-bit portable SIMD. + pub mod v256 { + pub use crate::implementation::simd::v256::validate_utf8_basic as validate_utf8; + pub use crate::implementation::simd::v256::ChunkedUtf8ValidatorImp; + pub use crate::implementation::simd::v256::Utf8ValidatorImp; + } +} diff --git a/portable/src/compat.rs b/portable/src/compat.rs new file mode 100644 index 00000000..ce3e62d9 --- /dev/null +++ b/portable/src/compat.rs @@ -0,0 +1,130 @@ +//! The `compat` API flavor provides full compatibility with [`std::str::from_utf8()`] and detailed validation errors. +//! +//! In particular, [`from_utf8()`] +//! returns an [`Utf8Error`], which has the [`valid_up_to()`](Utf8Error#method.valid_up_to) and +//! [`error_len()`](Utf8Error#method.error_len) methods. The first is useful for verification of streamed data. The +//! second is useful e.g. for replacing invalid byte sequences with a replacement character. +//! +//! The functions in this module also fail early: errors are checked on-the-fly as the string is processed and once +//! an invalid UTF-8 sequence is encountered, it returns without processing the rest of the data. +//! This comes at a slight performance penalty compared to the [`crate::basic`] module if the input is valid UTF-8. + +use core::fmt::Display; +use core::fmt::Formatter; + +use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut}; + +use crate::implementation::validate_utf8_compat; + +/// UTF-8 error information compatible with [`std::str::Utf8Error`]. +/// +/// Contains information on the location of the encountered validation error and the length of the +/// invalid UTF-8 sequence. +#[derive(Copy, Eq, PartialEq, Clone, Debug)] +pub struct Utf8Error { + pub(crate) valid_up_to: usize, + pub(crate) error_len: Option, +} + +impl Utf8Error { + /// Analogue to [`std::str::Utf8Error::valid_up_to()`](std::str::Utf8Error#method.valid_up_to). + /// + /// ... + #[inline] + #[must_use] + pub const fn valid_up_to(&self) -> usize { + self.valid_up_to + } + + /// Analogue to [`std::str::Utf8Error::error_len()`](std::str::Utf8Error#method.error_len). + /// + /// ... + #[inline] + #[must_use] + pub fn error_len(&self) -> Option { + self.error_len.map(|len| len as usize) + } +} + +impl Display for Utf8Error { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + if let Some(error_len) = self.error_len { + write!( + f, + "invalid utf-8 sequence of {} bytes from index {}", + error_len, self.valid_up_to + ) + } else { + write!( + f, + "incomplete utf-8 byte sequence from index {}", + self.valid_up_to + ) + } + } +} + +impl core::error::Error for Utf8Error {} + +/// Analogue to [`std::str::from_utf8()`]. +/// +/// Checks if the passed byte sequence is valid UTF-8 and returns an +/// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is. +/// +/// # Errors +/// Will return Err([`Utf8Error`]) on if the input contains invalid UTF-8 with +/// detailed error information. +#[inline] +pub fn from_utf8(input: &[u8]) -> Result<&str, Utf8Error> { + unsafe { + validate_utf8_compat(input)?; + Ok(from_utf8_unchecked(input)) + } +} + +/// Analogue to [`std::str::from_utf8_mut()`]. +/// +/// Checks if the passed mutable byte sequence is valid UTF-8 and returns a mutable +/// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is. +/// +/// # Errors +/// Will return Err([`Utf8Error`]) on if the input contains invalid UTF-8 with +/// detailed error information. +#[inline] +pub fn from_utf8_mut(input: &mut [u8]) -> Result<&mut str, Utf8Error> { + unsafe { + validate_utf8_compat(input)?; + Ok(from_utf8_unchecked_mut(input)) + } +} + +/// Allows direct access to the platform-specific unsafe validation implementations. +#[cfg(feature = "public_imp")] +pub mod imp { + /// Best for current target as defined by compile-time arch and target features. If no fast + /// SIMD implementation is available, the scalar implementation from the standard library is + /// used as a fallback. + /// + /// However, the crate feature `force_nonsimd` forces the fallback implementation, `force_simd128` + /// forces the 128-bit SIMD implementation and `force_simd256` forces the 256-bit SIMD implementation, + /// in order of precedence. + /// + pub mod auto { + pub use crate::implementation::auto::validate_utf8_compat as validate_utf8; + } + + /// Includes the scalar fallback implementation using 128-bit portable SIMD. + pub mod fallback { + pub use crate::implementation::fallback::validate_utf8_compat as validate_utf8; + } + + /// Includes the validation implementation for 128-bit portable SIMD. + pub mod v128 { + pub use crate::implementation::simd::v128::validate_utf8_compat as validate_utf8; + } + + /// Includes the validation implementation for 256-bit portable SIMD. + pub mod v256 { + pub use crate::implementation::simd::v256::validate_utf8_compat as validate_utf8; + } +} diff --git a/portable/src/implementation/fallback.rs b/portable/src/implementation/fallback.rs new file mode 100644 index 00000000..95f03755 --- /dev/null +++ b/portable/src/implementation/fallback.rs @@ -0,0 +1,147 @@ +/// Fallback implementation using the standard library. +/// +/// # Errors +/// Returns the zero-sized [`basic::Utf8Error`] on failure. +#[inline] +pub const fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { + match core::str::from_utf8(input) { + Ok(_) => Ok(()), + Err(_) => Err(crate::basic::Utf8Error {}), + } +} + +/// Fallback implementation using the standard library. +/// +/// # Errors +/// Returns [`compat::Utf8Error`] with detailed error information on failure. +#[inline] +pub fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { + super::validate_utf8_at_offset(input, 0) +} + +/// Low-level implementation of the [`basic::imp::Utf8Validator`] trait. +/// +/// This is implementation requires CPU SIMD features specified by the module it resides in. +/// It is undefined behavior to call it if the required CPU features are not +/// available. +#[cfg(feature = "public_imp")] +pub struct Utf8ValidatorImp { + expected_cont_bytes: u8, + err: bool, +} + +#[cfg(feature = "public_imp")] +pub use Utf8ValidatorImp as ChunkedUtf8ValidatorImp; + +#[cfg(feature = "public_imp")] +impl Utf8ValidatorImp { + #[inline] + fn update(&mut self, mut input: &[u8]) { + if self.err { + return; + } + if self.expected_cont_bytes > 0 { + let to_check = (self.expected_cont_bytes as usize).min(input.len()); + for b in &input[..to_check] { + if b & 0b1100_0000 != 0b1000_0000 { + // not a continuation byte + self.err = true; + return; + } + self.expected_cont_bytes -= 1; + } + if self.expected_cont_bytes > 0 { + // not enough continuation bytes + return; + } + input = &input[to_check..]; + } + if let Err(e) = core::str::from_utf8(input) { + // cannot wrap, since there is at least one byte left which is not valid UTF-8 + // by itself + self.expected_cont_bytes = match input[e.valid_up_to()] { + 0b1100_0000..0b1110_0000 => 1, + 0b1110_0000..0b1111_0000 => 2, + 0b1111_0000..0b1111_1000 => 3, + _ => { + // invalid byte for starting sequence + self.err = true; + return; + } + }; + let rem_input = &input[e.valid_up_to() + 1..]; + let rem_input = &rem_input[0..rem_input.len().min(self.expected_cont_bytes as usize)]; + for b in rem_input { + if b & 0b1100_0000 != 0b1000_0000 { + // not a continuation byte + self.err = true; + return; + } + self.expected_cont_bytes -= 1; + } + debug_assert!(self.expected_cont_bytes > 0); // otherwise from_utf8 would not have errored + } + } + + #[inline] + const fn finalize(self) -> core::result::Result<(), crate::basic::Utf8Error> { + if self.err || self.expected_cont_bytes > 0 { + Err(crate::basic::Utf8Error {}) + } else { + Ok(()) + } + } +} + +#[cfg(feature = "public_imp")] +impl crate::basic::imp::Utf8Validator for Utf8ValidatorImp { + #[inline] + #[must_use] + fn new() -> Self { + Self { + expected_cont_bytes: 0, + err: false, + } + } + + #[inline] + fn update(&mut self, input: &[u8]) { + if input.is_empty() { + return; + } + self.update(input); + } + + #[inline] + fn finalize(self) -> core::result::Result<(), crate::basic::Utf8Error> { + self.finalize() + } +} + +#[cfg(feature = "public_imp")] +impl crate::basic::imp::ChunkedUtf8Validator for Utf8ValidatorImp { + #[inline] + #[must_use] + fn new() -> Self { + Self { + expected_cont_bytes: 0, + err: false, + } + } + + #[inline] + fn update_from_chunks(&mut self, input: &[u8]) { + self.update(input); + } + + #[inline] + fn finalize( + mut self, + remaining_input: core::option::Option<&[u8]>, + ) -> core::result::Result<(), crate::basic::Utf8Error> { + if let Some(remaining_input) = remaining_input { + self.update(remaining_input); + } + self.finalize() + } +} diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs new file mode 100644 index 00000000..cb26a1a0 --- /dev/null +++ b/portable/src/implementation/mod.rs @@ -0,0 +1,93 @@ +//! Contains UTF-8 validation implementations. + +#![forbid(unsafe_code)] + +pub(crate) mod fallback; + +#[allow(unused)] +pub(crate) mod simd; + +cfg_if::cfg_if! { + if #[cfg(feature = "force_fallback")] { + pub(crate) use fallback as auto; + } else if #[cfg(feature = "force_simd128")] { + pub(crate) use simd::v128 as auto; + } else if #[cfg(feature = "force_simd256")] { + pub(crate) use simd::v256 as auto; + // known good configurations + } else if #[cfg(all( + any(target_arch = "x86_64", target_arch = "x86"), + target_feature = "avx2" + ))] { + pub(crate) use simd::v256 as auto; + } else if #[cfg(all( + any(target_arch = "x86_64", target_arch = "x86"), + target_feature = "sse4.2" + ))] { + pub(crate) use simd::v128 as auto; + } else if #[cfg(all( + target_arch = "aarch64", + target_feature = "neon" + ))] { + pub(crate) use simd::v128 as auto; + } else if #[cfg(all( + target_arch = "arm", + target_endian = "little", + target_feature = "v7", + target_feature = "neon" + ))] { + pub(crate) use simd::v128 as auto; + } else if #[cfg(all( + target_arch = "wasm32", + target_feature = "simd128" + ))] { + pub(crate) use simd::v128 as auto; + } else { + pub(crate) use fallback as auto; + } +} + +#[inline] +pub(crate) fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { + if input.len() < simd::SIMD_CHUNK_SIZE { + return fallback::validate_utf8_basic(input); + } + + validate_utf8_basic_simd(input) +} + +#[inline(never)] +#[allow(clippy::missing_const_for_fn)] +fn validate_utf8_basic_simd(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { + auto::validate_utf8_basic(input) +} + +#[inline] +pub(crate) fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { + if input.len() < simd::SIMD_CHUNK_SIZE { + return fallback::validate_utf8_compat(input); + } + + validate_utf8_compat_simd(input) +} + +fn validate_utf8_compat_simd(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { + auto::validate_utf8_compat(input) +} + +type Utf8ErrorCompat = crate::compat::Utf8Error; + +#[inline] +#[expect(clippy::cast_possible_truncation)] +fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(), Utf8ErrorCompat> { + match core::str::from_utf8(&input[offset..]) { + Ok(_) => Ok(()), + Err(err) => Err(Utf8ErrorCompat { + valid_up_to: err.valid_up_to() + offset, + error_len: err.error_len().map(|len| { + // never truncates since std::str::err::Utf8Error::error_len() never returns value larger than 4 + len as u8 + }), + }), + } +} diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs new file mode 100644 index 00000000..0fa878f5 --- /dev/null +++ b/portable/src/implementation/simd.rs @@ -0,0 +1,1016 @@ +use core::simd::u8x32; +use core::simd::{ + cmp::SimdPartialOrd, + num::{SimdInt, SimdUint}, + simd_swizzle, u8x16, LaneCount, Simd, SupportedLaneCount, +}; + +use crate::basic; + +pub(crate) const SIMD_CHUNK_SIZE: usize = 64; + +#[cfg(all( + any(target_arch = "aarch64", target_arch = "arm"), + target_feature = "neon" +))] +const HAS_FAST_REDUCE_MAX: bool = true; + +#[cfg(not(all( + any(target_arch = "aarch64", target_arch = "arm"), + target_feature = "neon" +)))] +const HAS_FAST_REDUCE_MAX: bool = false; + +const HAS_FAST_MASKED_LOAD: bool = false; // FIXME avx512 + +#[repr(C)] +struct SimdInput +where + LaneCount: SupportedLaneCount, +{ + vals: [Simd; O], +} + +trait SimdInputTrait { + fn new(ptr: &[u8]) -> Self; + fn new_partial_masked_load(slice: &[u8]) -> Self; + fn new_partial_copy(slice: &[u8]) -> Self; + #[inline] + fn new_partial(slice: &[u8]) -> Self + where + Self: Sized, + { + if HAS_FAST_MASKED_LOAD { + Self::new_partial_masked_load(slice) + } else { + Self::new_partial_copy(slice) + } + } + fn is_ascii(&self) -> bool; +} + +impl SimdInputTrait for SimdInput<16, 4> { + #[inline] + fn new(s: &[u8]) -> Self { + assert!(s.len() == 64); + Self { + vals: [ + u8x16::from_slice(&s[..16]), + u8x16::from_slice(&s[16..32]), + u8x16::from_slice(&s[32..48]), + u8x16::from_slice(&s[48..64]), + ], + } + } + + #[inline] + fn new_partial_masked_load(mut slice: &[u8]) -> Self { + let val0 = load_masked_opt(slice); + slice = &slice[slice.len().min(16)..]; + if slice.is_empty() { + return Self { + vals: [val0, u8x16::default(), u8x16::default(), u8x16::default()], + }; + } + let val1 = load_masked_opt(slice); + slice = &slice[slice.len().min(16)..]; + if slice.is_empty() { + return Self { + vals: [val0, val1, u8x16::default(), u8x16::default()], + }; + } + let val2 = load_masked_opt(slice); + slice = &slice[slice.len().min(16)..]; + if slice.is_empty() { + return Self { + vals: [val0, val1, val2, u8x16::default()], + }; + } + let val3 = load_masked_opt(slice); + Self { + vals: [val0, val1, val2, val3], + } + } + + #[inline] + fn new_partial_copy(slice: &[u8]) -> Self { + let mut buf = [0; 64]; + buf[..slice.len()].copy_from_slice(slice); + Self::new(&buf) + } + + #[inline] + fn is_ascii(&self) -> bool { + (self.vals[0] | self.vals[1] | self.vals[2] | self.vals[3]).is_ascii() + } +} + +impl SimdInputTrait for SimdInput<32, 2> { + #[inline] + fn new(s: &[u8]) -> Self { + assert!(s.len() == 64); + Self { + vals: [u8x32::from_slice(&s[..32]), u8x32::from_slice(&s[32..64])], + } + } + + #[inline] + fn new_partial_masked_load(mut slice: &[u8]) -> Self { + let val0 = load_masked_opt(slice); + slice = &slice[slice.len().min(32)..]; + if slice.is_empty() { + return Self { + vals: [val0, u8x32::default()], + }; + } + let val1 = load_masked_opt(slice); + Self { vals: [val0, val1] } + } + + #[inline] + fn new_partial_copy(slice: &[u8]) -> Self { + let mut buf = [0; 64]; + buf[..slice.len()].copy_from_slice(slice); + Self::new(&buf) + } + + #[inline] + fn is_ascii(&self) -> bool { + (self.vals[0] | self.vals[1]).is_ascii() + } +} + +#[inline] +fn load_masked_opt(slice: &[u8]) -> Simd +where + LaneCount: SupportedLaneCount, +{ + if slice.len() > N - 1 { + Simd::::from_slice(&slice[..N]) + } else { + Simd::::load_or_default(slice) + } +} + +struct Utf8CheckAlgorithm +where + LaneCount: SupportedLaneCount, +{ + pub(crate) prev: Simd, + pub(crate) incomplete: Simd, // FIXME: could be a mask? + pub(crate) error: Simd, // FIXME: could be a mask? +} + +trait Lookup16 { + #[expect(clippy::too_many_arguments)] + fn lookup_16( + self, + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self; +} + +trait SimdU8Value +where + LaneCount: SupportedLaneCount, + Self: Copy, +{ + #[expect(clippy::too_many_arguments)] + fn from_32_cut_off_leading( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + v16: u8, + v17: u8, + v18: u8, + v19: u8, + v20: u8, + v21: u8, + v22: u8, + v23: u8, + v24: u8, + v25: u8, + v26: u8, + v27: u8, + v28: u8, + v29: u8, + v30: u8, + v31: u8, + ) -> Self; + + #[expect(clippy::too_many_arguments)] + fn repeat_16( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self; + + // const generics would be more awkward and verbose with the current + // portable SIMD swizzle implementation and compiler limitations. + fn prev1(self, prev: Self) -> Self; + fn prev2(self, prev: Self) -> Self; + fn prev3(self, prev: Self) -> Self; + + fn is_ascii(self) -> bool; +} + +impl SimdU8Value<16> for u8x16 { + #[inline] + fn from_32_cut_off_leading( + _v0: u8, + _v1: u8, + _v2: u8, + _v3: u8, + _v4: u8, + _v5: u8, + _v6: u8, + _v7: u8, + _v8: u8, + _v9: u8, + _v10: u8, + _v11: u8, + _v12: u8, + _v13: u8, + _v14: u8, + _v15: u8, + v16: u8, + v17: u8, + v18: u8, + v19: u8, + v20: u8, + v21: u8, + v22: u8, + v23: u8, + v24: u8, + v25: u8, + v26: u8, + v27: u8, + v28: u8, + v29: u8, + v30: u8, + v31: u8, + ) -> Self { + Self::from_array([ + v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, + ]) + } + + #[inline] + fn repeat_16( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self { + Self::from_array([ + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + ]) + } + + #[inline] + fn prev1(self, prev: Self) -> Self { + simd_swizzle!( + self, + prev, + [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,] + ) + } + + #[inline] + fn prev2(self, prev: Self) -> Self { + simd_swizzle!( + self, + prev, + [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,] + ) + } + + #[inline] + fn prev3(self, prev: Self) -> Self { + simd_swizzle!( + self, + prev, + [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,] + ) + } + + #[inline] + fn is_ascii(self) -> bool { + if HAS_FAST_REDUCE_MAX { + self.reduce_max() < 0b1000_0000 + } else { + (self & Self::splat(0b1000_0000)) == Self::splat(0) + } + } +} + +impl Lookup16 for Simd +where + Self: SimdU8Value, + LaneCount: SupportedLaneCount, +{ + #[inline] + fn lookup_16( + self, + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self { + // We need to ensure that 'self' only contains the lower 4 bits, unlike the avx instruction + // this will otherwise lead to bad results + let src = Self::repeat_16( + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + ); + src.swizzle_dyn(self) + } +} + +impl SimdU8Value<32> for u8x32 { + #[inline] + fn from_32_cut_off_leading( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + v16: u8, + v17: u8, + v18: u8, + v19: u8, + v20: u8, + v21: u8, + v22: u8, + v23: u8, + v24: u8, + v25: u8, + v26: u8, + v27: u8, + v28: u8, + v29: u8, + v30: u8, + v31: u8, + ) -> Self { + Self::from_array([ + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, + v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, + ]) + } + + #[inline] + fn repeat_16( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self { + Self::from_array([ + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v0, v1, v2, v3, + v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + ]) + } + + #[inline] + fn prev1(self, prev: Self) -> Self { + // FIXME? this is more than we actually need. Not sure if AVX2 support this + simd_swizzle!( + self, + prev, + [ + 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30 + ] + ) + } + + #[inline] + fn prev2(self, prev: Self) -> Self { + simd_swizzle!( + self, + prev, + [ + 62, 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29 + ] + ) + } + + #[inline] + fn prev3(self, prev: Self) -> Self { + simd_swizzle!( + self, + prev, + [ + 61, 62, 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, + ] + ) + } + + #[inline] + fn is_ascii(self) -> bool { + if HAS_FAST_REDUCE_MAX { + self.reduce_max() < 0b1000_0000 + } else { + (self & Self::splat(0b1000_0000)) == Self::splat(0) + } + } +} + +impl Utf8CheckAlgorithm +where + LaneCount: SupportedLaneCount, + Simd: SimdU8Value, + SimdInput: SimdInputTrait, +{ + #[inline] + fn new() -> Self { + Self { + prev: Simd::::splat(0), + incomplete: Simd::::splat(0), + error: Simd::::splat(0), + } + } + + #[inline] + fn check_incomplete_pending(&mut self) { + self.error |= self.incomplete; + } + + #[inline] + fn is_incomplete(input: Simd) -> Simd { + input.saturating_sub(SimdU8Value::::from_32_cut_off_leading( + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0b1111_0000 - 1, + 0b1110_0000 - 1, + 0b1100_0000 - 1, + )) + } + + #[inline] + fn check_special_cases(input: Simd, prev1: Simd) -> Simd { + const TOO_SHORT: u8 = 1 << 0; + const TOO_LONG: u8 = 1 << 1; + const OVERLONG_3: u8 = 1 << 2; + const SURROGATE: u8 = 1 << 4; + const OVERLONG_2: u8 = 1 << 5; + const TWO_CONTS: u8 = 1 << 7; + const TOO_LARGE: u8 = 1 << 3; + const TOO_LARGE_1000: u8 = 1 << 6; + const OVERLONG_4: u8 = 1 << 6; + const CARRY: u8 = TOO_SHORT | TOO_LONG | TWO_CONTS; + + let byte_1_high = (prev1 >> 4).lookup_16( + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TWO_CONTS, + TWO_CONTS, + TWO_CONTS, + TWO_CONTS, + TOO_SHORT | OVERLONG_2, + TOO_SHORT, + TOO_SHORT | OVERLONG_3 | SURROGATE, + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4, + ); + + let byte_1_low = (prev1 & Simd::::splat(0x0F)).lookup_16( + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + CARRY | OVERLONG_2, + CARRY, + CARRY, + CARRY | TOO_LARGE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + ); + + let byte_2_high = (input >> 4).lookup_16( + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + ); + + byte_1_high & byte_1_low & byte_2_high + } + + #[inline] + fn must_be_2_3_continuation(prev2: Simd, prev3: Simd) -> Simd { + let is_third_byte = prev2 + .simd_gt(Simd::::splat(0b1110_0000 - 1)) + .to_int(); + let is_fourth_byte = prev3 + .simd_gt(Simd::::splat(0b1111_0000 - 1)) + .to_int(); + + (is_third_byte | is_fourth_byte).cast() + } + + #[inline] + fn check_multibyte_lengths( + input: Simd, + prev: Simd, + special_cases: Simd, + ) -> Simd { + let prev2 = input.prev2(prev); + let prev3 = input.prev3(prev); + let must23 = Self::must_be_2_3_continuation(prev2, prev3); + let must23_80 = must23 & Simd::::splat(0x80); + must23_80 ^ special_cases + } + + #[inline] + fn has_error(&self) -> bool { + // FIXME: max workaround + if HAS_FAST_REDUCE_MAX { + self.error.reduce_max() != 0 + } else { + self.error != Simd::::splat(0) + } + } + + #[inline] + fn check_bytes(&mut self, input: Simd) { + let prev1 = input.prev1(self.prev); + let sc = Self::check_special_cases(input, prev1); + self.error |= Self::check_multibyte_lengths(input, self.prev, sc); + self.prev = input; + } + + #[inline] + fn check_utf8(&mut self, input: &SimdInput) { + if input.is_ascii() { + self.check_incomplete_pending(); + } else { + self.check_block(input); + } + } + + #[inline] + fn check_block(&mut self, input: &SimdInput) { + // WORKAROUND + // necessary because the for loop is not unrolled on ARM64 + if input.vals.len() == 2 { + self.check_bytes(input.vals[0]); + self.check_bytes(input.vals[1]); + self.incomplete = Self::is_incomplete(input.vals[1]); + } else if input.vals.len() == 4 { + self.check_bytes(input.vals[0]); + self.check_bytes(input.vals[1]); + self.check_bytes(input.vals[2]); + self.check_bytes(input.vals[3]); + self.incomplete = Self::is_incomplete(input.vals[3]); + } else { + panic!("Unsupported number of chunks"); + } + } + + /// Validation implementation for CPUs supporting the SIMD extension (see module). + /// + /// # Errors + /// Returns the zero-sized [`basic::Utf8Error`] on failure. + /// + #[inline] + pub fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8Error> { + let mut algorithm = Self::new(); + let mut chunks = input.chunks_exact(SIMD_CHUNK_SIZE); + for chunk in chunks.by_ref() { + let simd_input = SimdInput::::new(chunk); + if !simd_input.is_ascii() { + algorithm.check_block(&simd_input); + break; + } + } + for chunk in chunks.by_ref() { + algorithm.check_utf8(&SimdInput::::new(chunk)); + } + let rem = chunks.remainder(); + if !rem.is_empty() { + let simd_input = SimdInput::::new_partial(rem); + algorithm.check_utf8(&simd_input); + } + algorithm.check_incomplete_pending(); + if algorithm.has_error() { + Err(basic::Utf8Error {}) + } else { + Ok(()) + } + } + + #[inline] + #[expect(clippy::redundant_else)] // more readable + fn validate_utf8_compat_simd0(input: &[u8]) -> core::result::Result<(), usize> { + let mut algorithm = Self::new(); + let mut idx = 0; + let mut chunks = input.chunks_exact(SIMD_CHUNK_SIZE); + let mut only_ascii = true; + + 'outer: loop { + if only_ascii { + for chunk in chunks.by_ref() { + let simd_input = SimdInput::new(chunk); + if !simd_input.is_ascii() { + algorithm.check_block(&simd_input); + if algorithm.has_error() { + return Err(idx); + } else { + only_ascii = false; + idx += SIMD_CHUNK_SIZE; + continue 'outer; + } + } + idx += SIMD_CHUNK_SIZE; + } + } else { + for chunk in chunks.by_ref() { + let simd_input = SimdInput::new(chunk); + if simd_input.is_ascii() { + algorithm.check_incomplete_pending(); + if algorithm.has_error() { + return Err(idx); + } else { + // we are in pure ASCII territory again + only_ascii = true; + idx += SIMD_CHUNK_SIZE; + continue 'outer; + } + } else { + algorithm.check_block(&simd_input); + if algorithm.has_error() { + return Err(idx); + } + } + idx += SIMD_CHUNK_SIZE; + } + } + break; + } + let rem = chunks.remainder(); + if !rem.is_empty() { + let simd_input = SimdInput::::new_partial(rem); + algorithm.check_utf8(&simd_input); + } + algorithm.check_incomplete_pending(); + if algorithm.has_error() { + Err(idx) + } else { + Ok(()) + } + } +} + +/// Low-level implementation of the [`basic::imp::Utf8Validator`] trait. +/// +/// This is implementation requires CPU SIMD features specified by the module it resides in. +/// It is undefined behavior to call it if the required CPU features are not +/// available. +#[cfg(feature = "public_imp")] +pub struct Utf8ValidatorImp { + algorithm: Utf8CheckAlgorithm<16, 4>, + incomplete_data: [u8; 64], + incomplete_len: usize, +} + +#[cfg(feature = "public_imp")] +impl Utf8ValidatorImp { + #[inline] + fn update_from_incomplete_data(&mut self) { + let simd_input = SimdInput::new(&self.incomplete_data); + self.algorithm.check_utf8(&simd_input); + self.incomplete_len = 0; + } +} + +#[cfg(feature = "public_imp")] +impl basic::imp::Utf8Validator for Utf8ValidatorImp { + #[inline] + #[must_use] + fn new() -> Self { + Self { + algorithm: Utf8CheckAlgorithm::<16, 4>::new(), + incomplete_data: [0; 64], + incomplete_len: 0, + } + } + + #[inline] + fn update(&mut self, mut input: &[u8]) { + if input.is_empty() { + return; + } + if self.incomplete_len != 0 { + let to_copy = core::cmp::min(SIMD_CHUNK_SIZE - self.incomplete_len, input.len()); + self.incomplete_data[self.incomplete_len..self.incomplete_len + to_copy] + .copy_from_slice(&input[..to_copy]); + if self.incomplete_len + to_copy == SIMD_CHUNK_SIZE { + self.update_from_incomplete_data(); + input = &input[to_copy..]; + } else { + self.incomplete_len += to_copy; + return; + } + } + // no incomplete data, check chunks + let mut chunks = input.chunks_exact(SIMD_CHUNK_SIZE); + for chunk in &mut chunks { + let input = SimdInput::new(chunk); + self.algorithm.check_utf8(&input); + } + if !chunks.remainder().is_empty() { + self.incomplete_data[..chunks.remainder().len()].copy_from_slice(chunks.remainder()); + self.incomplete_len = chunks.remainder().len(); + } + } + + #[inline] + fn finalize(mut self) -> core::result::Result<(), basic::Utf8Error> { + if self.incomplete_len != 0 { + self.incomplete_data[self.incomplete_len..].fill(0); + self.update_from_incomplete_data(); + } + self.algorithm.check_incomplete_pending(); + if self.algorithm.has_error() { + Err(basic::Utf8Error {}) + } else { + Ok(()) + } + } +} + +/// Low-level implementation of the [`basic::imp::ChunkedUtf8Validator`] trait. +/// +/// This is implementation requires CPU SIMD features specified by the module it resides in. +/// It is undefined behavior to call it if the required CPU features are not +/// available. +#[cfg(feature = "public_imp")] +pub struct ChunkedUtf8ValidatorImp { + algorithm: Utf8CheckAlgorithm<16, 4>, +} + +#[cfg(feature = "public_imp")] +impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { + #[inline] + #[must_use] + fn new() -> Self { + Self { + algorithm: Utf8CheckAlgorithm::<16, 4>::new(), + } + } + + #[inline] + fn update_from_chunks(&mut self, input: &[u8]) { + assert!( + input.len() % SIMD_CHUNK_SIZE == 0, + "Input size must be a multiple of 64." + ); + for chunk in input.chunks_exact(SIMD_CHUNK_SIZE) { + let input = SimdInput::new(chunk); + self.algorithm.check_utf8(&input); + } + } + + #[inline] + fn finalize( + mut self, + remaining_input: core::option::Option<&[u8]>, + ) -> core::result::Result<(), basic::Utf8Error> { + if let Some(remaining_input) = remaining_input { + if !remaining_input.is_empty() { + let mut chunks = remaining_input.chunks_exact(SIMD_CHUNK_SIZE); + for chunk in &mut chunks { + let input = SimdInput::new(chunk); + self.algorithm.check_utf8(&input); + } + if !chunks.remainder().is_empty() { + let simd_input = SimdInput::new_partial(chunks.remainder()); + self.algorithm.check_utf8(&simd_input); + } + } + } + self.algorithm.check_incomplete_pending(); + if self.algorithm.has_error() { + Err(basic::Utf8Error {}) + } else { + Ok(()) + } + } +} + +#[cold] +#[expect(clippy::unwrap_used)] +#[allow(dead_code)] // only used if there is a SIMD implementation +pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> crate::compat::Utf8Error { + let offset = if failing_block_pos == 0 { + // Error must be in this block since it is the first. + 0 + } else { + // The previous block is OK except for a possible continuation over the block boundary. + // We go backwards over the last three bytes of the previous block and find the + // last non-continuation byte as a starting point for an std validation. If the last + // three bytes are all continuation bytes then the previous block ends with a four byte + // UTF-8 codepoint, is thus complete and valid UTF-8. We start the check with the + // current block in that case. + (1..=3) + .find(|i| input[failing_block_pos - i] >> 6 != 0b10) + .map_or(failing_block_pos, |i| failing_block_pos - i) + }; + // UNWRAP: safe because the SIMD UTF-8 validation found an error + super::validate_utf8_at_offset(input, offset).unwrap_err() +} + +pub(crate) mod v128 { + /// Validation implementation using 128-bit SIMD. + /// + /// # Errors + /// Returns the zero-sized [`basic::Utf8Error`] on failure. + #[inline] + pub fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), crate::basic::Utf8Error> { + super::Utf8CheckAlgorithm::<16, 4>::validate_utf8_basic(input) + } + + /// Validation implementation using 128-bit SIMD. + /// + /// # Errors + /// Returns [`compat::Utf8Error`] with detailed error information on failure. + #[inline] + pub fn validate_utf8_compat( + input: &[u8], + ) -> core::result::Result<(), crate::compat::Utf8Error> { + super::Utf8CheckAlgorithm::<16, 4>::validate_utf8_compat_simd0(input) + .map_err(|idx| super::get_compat_error(input, idx)) + } + + #[cfg(feature = "public_imp")] + pub use super::ChunkedUtf8ValidatorImp; + #[cfg(feature = "public_imp")] + pub use super::Utf8ValidatorImp; +} + +pub(crate) mod v256 { + /// Validation implementation using 256-bit SIMD. + /// + /// # Errors + /// Returns the zero-sized [`basic::Utf8Error`] on failure. + #[inline] + pub fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), crate::basic::Utf8Error> { + super::Utf8CheckAlgorithm::<32, 2>::validate_utf8_basic(input) + } + + /// Validation implementation using 256-bit SIMD. + /// + /// # Errors + /// Returns [`compat::Utf8Error`] with detailed error information on failure. + #[inline] + pub fn validate_utf8_compat( + input: &[u8], + ) -> core::result::Result<(), crate::compat::Utf8Error> { + super::Utf8CheckAlgorithm::<32, 2>::validate_utf8_compat_simd0(input) + .map_err(|idx| super::get_compat_error(input, idx)) + } + + #[cfg(feature = "public_imp")] + pub use super::ChunkedUtf8ValidatorImp; + #[cfg(feature = "public_imp")] + pub use super::Utf8ValidatorImp; +} diff --git a/portable/src/lib.rs b/portable/src/lib.rs new file mode 100644 index 00000000..7cf85aba --- /dev/null +++ b/portable/src/lib.rs @@ -0,0 +1,104 @@ +#![warn(unused_extern_crates)] +#![warn( + clippy::all, + clippy::unwrap_used, + clippy::unnecessary_unwrap, + clippy::pedantic, + clippy::nursery +)] +#![expect(clippy::redundant_pub_crate)] // check is broken (see e.g. https://github.com/rust-lang/rust-clippy/issues/5369) +#![deny(missing_docs)] +#![cfg_attr(not(feature = "std"), no_std)] +#![feature(doc_auto_cfg)] +#![feature(portable_simd)] + +//! Blazingly fast API-compatible UTF-8 validation for Rust using SIMD extensions, based on the implementation from +//! [simdjson](https://github.com/simdjson/simdjson). Originally ported to Rust by the developers of [simd-json.rs](https://simd-json.rs), but now heavily improved. +//! +//! ## Quick start +//! Add the dependency to your Cargo.toml file: +//! ```toml +//! [dependencies] +//! simdutf8 = "0.1.5" +//! ``` +//! +//! Use [`basic::from_utf8()`] as a drop-in replacement for `std::str::from_utf8()`. +//! +//! ```rust +//! use simdutf8_portable::basic::from_utf8; +//! +//! println!("{}", from_utf8(b"I \xE2\x9D\xA4\xEF\xB8\x8F UTF-8!").unwrap()); +//! ``` +//! +//! If you need detailed information on validation failures, use [`compat::from_utf8()`] +//! instead. +//! +//! ```rust +//! use simdutf8_portable::compat::from_utf8; +//! +//! let err = from_utf8(b"I \xE2\x9D\xA4\xEF\xB8 UTF-8!").unwrap_err(); +//! assert_eq!(err.valid_up_to(), 5); +//! assert_eq!(err.error_len(), Some(2)); +//! ``` +//! +//! ## APIs +//! +//! ### Basic flavor +//! Use the `basic` API flavor for maximum speed. It is fastest on valid UTF-8, but only checks +//! for errors after processing the whole byte sequence and does not provide detailed information if the data +//! is not valid UTF-8. [`basic::Utf8Error`] is a zero-sized error struct. +//! +//! ### Compat flavor +//! The `compat` flavor is fully API-compatible with `std::str::from_utf8()`. In particular, [`compat::from_utf8()`] +//! returns a [`compat::Utf8Error`], which has [`valid_up_to()`](compat::Utf8Error#method.valid_up_to) and +//! [`error_len()`](compat::Utf8Error#method.error_len) methods. The first is useful for verification of streamed data. The +//! second is useful e.g. for replacing invalid byte sequences with a replacement character. +//! +//! It also fails early: errors are checked on the fly as the string is processed and once +//! an invalid UTF-8 sequence is encountered, it returns without processing the rest of the data. +//! This comes at a slight performance penalty compared to the [`basic`] API even if the input is valid UTF-8. +//! +//! ## Implementation selection +//! +//! ### X86 +//! The fastest implementation is selected at runtime using the `std::is_x86_feature_detected!` macro, unless the CPU +//! targeted by the compiler supports the fastest available implementation. +//! So if you compile with `RUSTFLAGS="-C target-cpu=native"` on a recent x86-64 machine, the AVX 2 implementation is selected at +//! compile-time and runtime selection is disabled. +//! +//! For no-std support (compiled with `--no-default-features`) the implementation is always selected at compile time based on +//! the targeted CPU. Use `RUSTFLAGS="-C target-feature=+avx2"` for the AVX 2 implementation or `RUSTFLAGS="-C target-feature=+sse4.2"` +//! for the SSE 4.2 implementation. +//! +//! ### ARM64 +//! The SIMD implementation is used automatically since Rust 1.61. +//! +//! ### WASM32 +//! For wasm32 support, the implementation is selected at compile time based on the presence of the `simd128` target feature. +//! Use `RUSTFLAGS="-C target-feature=+simd128"` to enable the WASM SIMD implementation. WASM, at +//! the time of this writing, doesn't have a way to detect SIMD through WASM itself. Although this capability +//! is available in various WASM host environments (e.g., [wasm-feature-detect] in the web browser), there is no portable +//! way from within the library to detect this. +//! +//! [wasm-feature-detect]: https://github.com/GoogleChromeLabs/wasm-feature-detect +//! +//! ### Access to low-level functionality +//! If you want to be able to call a SIMD implementation directly, use the `public_imp` feature flag. The validation +//! implementations are then accessible via [`basic::imp`] and [`compat::imp`]. Traits facilitating streaming validation are available +//! there as well. +//! +//! ## Optimisation flags +//! Do not use [`opt-level = "z"`](https://doc.rust-lang.org/cargo/reference/profiles.html), which prevents inlining and makes +//! the code quite slow. +//! +//! ## Minimum Supported Rust Version (MSRV) +//! This crate's minimum supported Rust version is 1.38.0. +//! +//! ## Algorithm +//! +//! See Validating UTF-8 In Less Than One Instruction Per Byte, Software: Practice and Experience 51 (5), 2021 +//! + +pub mod basic; +pub mod compat; +mod implementation; diff --git a/portable/tests/tests.rs b/portable/tests/tests.rs new file mode 100644 index 00000000..55b3e399 --- /dev/null +++ b/portable/tests/tests.rs @@ -0,0 +1,407 @@ +#![allow(clippy::non_ascii_literal)] + +use simdutf8_portable::basic::from_utf8 as basic_from_utf8; +use simdutf8_portable::basic::from_utf8_mut as basic_from_utf8_mut; +use simdutf8_portable::compat::from_utf8 as compat_from_utf8; +use simdutf8_portable::compat::from_utf8_mut as compat_from_utf8_mut; + +#[cfg(not(feature = "std"))] +extern crate std; + +#[cfg(not(feature = "std"))] +use std::{borrow::ToOwned, format}; + +pub trait BStrExt { + fn repeat_x(&self, count: usize) -> Vec; +} + +/// b"a".repeat() is not implemented for Rust 1.38.0 (MSRV) +impl BStrExt for T +where + T: AsRef<[u8]>, +{ + #[expect(clippy::unwrap_used)] + fn repeat_x(&self, count: usize) -> Vec { + use std::io::Write; + + let x = self.as_ref(); + let mut res = Vec::with_capacity(x.len() * count); + for _ in 0..count { + res.write_all(x).unwrap(); + } + res + } +} + +fn test_valid(input: &[u8]) { + // std lib sanity check + assert!(std::str::from_utf8(input).is_ok()); + + assert!(basic_from_utf8(input).is_ok()); + assert!(compat_from_utf8(input).is_ok()); + + let mut mut_input = input.to_owned(); + assert!(basic_from_utf8_mut(mut_input.as_mut_slice()).is_ok()); + assert!(compat_from_utf8_mut(mut_input.as_mut_slice()).is_ok()); + + #[cfg(feature = "public_imp")] + public_imp::test_valid(input); +} + +fn test_invalid(input: &[u8], valid_up_to: usize, error_len: Option) { + // std lib sanity check + let err = std::str::from_utf8(input).unwrap_err(); + assert_eq!(err.valid_up_to(), valid_up_to); + assert_eq!(err.error_len(), error_len); + + assert!(basic_from_utf8(input).is_err()); + let err = compat_from_utf8(input).unwrap_err(); + assert_eq!(err.valid_up_to(), valid_up_to); + assert_eq!(err.error_len(), error_len); + + #[cfg(feature = "public_imp")] + public_imp::test_invalid(input, valid_up_to, error_len); +} + +#[cfg(feature = "public_imp")] +mod public_imp { + + #[allow(unused_variables)] // nothing to do if not SIMD implementation is available + pub(super) fn test_valid(input: &[u8]) { + #[cfg(feature = "public_imp")] + { + assert!(simdutf8_portable::basic::imp::fallback::validate_utf8(input).is_ok()); + assert!(simdutf8_portable::compat::imp::fallback::validate_utf8(input).is_ok()); + + test_streaming::( + input, true, + ); + test_chunked_streaming::< + simdutf8_portable::basic::imp::fallback::ChunkedUtf8ValidatorImp, + >(input, true); + + assert!(simdutf8_portable::basic::imp::v128::validate_utf8(input).is_ok()); + assert!(simdutf8_portable::compat::imp::v128::validate_utf8(input).is_ok()); + + test_streaming::(input, true); + test_chunked_streaming::( + input, true, + ); + + assert!(simdutf8_portable::basic::imp::v256::validate_utf8(input).is_ok()); + assert!(simdutf8_portable::compat::imp::v256::validate_utf8(input).is_ok()); + + test_streaming::(input, true); + test_chunked_streaming::( + input, true, + ); + } + } + + #[allow(unused_variables)] // nothing to do if not SIMD implementation is available + pub(super) fn test_invalid(input: &[u8], valid_up_to: usize, error_len: Option) { + #[cfg(feature = "public_imp")] + { + assert!(simdutf8_portable::basic::imp::fallback::validate_utf8(input).is_err()); + let err = simdutf8_portable::compat::imp::fallback::validate_utf8(input).unwrap_err(); + assert_eq!(err.valid_up_to(), valid_up_to); + assert_eq!(err.error_len(), error_len); + + test_streaming::( + input, false, + ); + test_chunked_streaming::< + simdutf8_portable::basic::imp::fallback::ChunkedUtf8ValidatorImp, + >(input, false); + + assert!(simdutf8_portable::basic::imp::v128::validate_utf8(input).is_err()); + let err = simdutf8_portable::compat::imp::v128::validate_utf8(input).unwrap_err(); + assert_eq!(err.valid_up_to(), valid_up_to); + assert_eq!(err.error_len(), error_len); + + test_streaming::(input, false); + test_chunked_streaming::( + input, false, + ); + + assert!(simdutf8_portable::basic::imp::v256::validate_utf8(input).is_err()); + let err = simdutf8_portable::compat::imp::v256::validate_utf8(input).unwrap_err(); + assert_eq!(err.valid_up_to(), valid_up_to); + assert_eq!(err.error_len(), error_len); + + test_streaming::(input, false); + test_chunked_streaming::( + input, false, + ); + } + } + + #[allow(unused)] // not used if not SIMD implementation is available + fn test_streaming(input: &[u8], ok: bool) { + let mut validator = T::new(); + validator.update(input); + assert_eq!(validator.finalize().is_ok(), ok); + for i in [64, 128, 256, 1024, 65536, 1, 2, 3, 36, 99].iter() { + test_streaming_blocks::(input, *i, ok) + } + } + + #[allow(unused)] // not used if not SIMD implementation is available + fn test_streaming_blocks( + input: &[u8], + block_size: usize, + ok: bool, + ) { + let mut validator = T::new(); + for chunk in input.chunks(block_size) { + validator.update(chunk); + } + assert_eq!(validator.finalize().is_ok(), ok); + } + + #[allow(unused)] // not used if not SIMD implementation is available + fn test_chunked_streaming( + input: &[u8], + ok: bool, + ) { + for i in [64, 128, 256, 1024, 65536].iter() { + test_chunked_streaming_with_chunk_size::(input, *i, ok) + } + } + + #[allow(unused)] // not used if not SIMD implementation is available + fn test_chunked_streaming_with_chunk_size< + T: simdutf8_portable::basic::imp::ChunkedUtf8Validator, + >( + input: &[u8], + chunk_size: usize, + ok: bool, + ) { + let mut validator = T::new(); + let mut chunks = input.chunks_exact(chunk_size); + for chunk in &mut chunks { + validator.update_from_chunks(chunk); + } + assert_eq!(validator.finalize(Some(chunks.remainder())).is_ok(), ok); + } + + #[test] + #[should_panic] + fn test_neon_chunked_panic() { + test_chunked_streaming_with_chunk_size::< + simdutf8_portable::basic::imp::v128::ChunkedUtf8ValidatorImp, + >(b"abcd", 1, true); + test_chunked_streaming_with_chunk_size::< + simdutf8_portable::basic::imp::v256::ChunkedUtf8ValidatorImp, + >(b"abcd", 1, true); + } +} + +fn test_invalid_after_specific_prefix( + input: &[u8], + valid_up_to: usize, + error_len: Option, + with_suffix_error_len: Option, + repeat: usize, + prefix_bytes: &[u8], +) { + { + let mut prefixed_input = prefix_bytes.repeat_x(repeat); + let prefix_len = prefixed_input.len(); + prefixed_input.extend_from_slice(input); + test_invalid(prefixed_input.as_ref(), valid_up_to + prefix_len, error_len) + } + + if repeat != 0 { + let mut prefixed_input = prefix_bytes.repeat_x(repeat); + let prefix_len = prefixed_input.len(); + prefixed_input.extend_from_slice(input); + prefixed_input.extend_from_slice(prefix_bytes.repeat_x(repeat).as_slice()); + test_invalid( + prefixed_input.as_ref(), + valid_up_to + prefix_len, + with_suffix_error_len, + ) + } +} + +fn test_invalid_after_prefix( + input: &[u8], + valid_up_to: usize, + error_len: Option, + with_suffix_error_len: Option, + repeat: usize, +) { + for prefix in [ + "a", + "ö", + "😊", + "a".repeat(64).as_str(), + ("a".repeat(64) + "ö".repeat(32).as_str()).as_str(), + ] + .iter() + { + test_invalid_after_specific_prefix( + input, + valid_up_to, + error_len, + with_suffix_error_len, + repeat, + prefix.as_bytes(), + ); + } +} + +fn test_invalid_after_prefixes( + input: &[u8], + valid_up_to: usize, + error_len: Option, + with_suffix_error_len: Option, +) { + for repeat in [ + 0, 1, 2, 7, 8, 9, 15, 16, 16, 31, 32, 33, 63, 64, 65, 127, 128, 129, + ] + .iter() + { + test_invalid_after_prefix( + input, + valid_up_to, + error_len, + with_suffix_error_len, + *repeat, + ); + } +} + +#[test] +fn simple_valid() { + test_valid(b""); + + test_valid(b"\0"); + + test_valid(b"a".repeat_x(64).as_ref()); + + test_valid(b"a".repeat_x(128).as_ref()); + + test_valid(b"The quick brown fox jumps over the lazy dog"); + + // umlauts + test_valid("öäüÖÄÜß".as_bytes()); + + // emojis + test_valid("❤️✨🥺🔥😂😊✔️👍🥰".as_bytes()); + + // Chinese + test_valid("断用山昨屈内銀代意検瓶調像。情旗最投任留財夜隆年表高学送意功者。辺図掲記込真通第民国聞平。海帰傷芸記築世防橋整済歳権君注。選紙例並情夕破勢景移情誇進場豊読。景関有権米武野範随惑旬特覧刊野。相毎加共情面教地作減関絡。暖料児違歩致本感閉浦出楽赤何。時選権週邑針格事提一案質名投百定。止感右聞食三年外積文載者別。".as_bytes()); + + // Japanese + test_valid("意ざど禁23費サヒ車園オスミト規更ワエ異67事続トソキ音合岡治こ訪京ぴ日9稿がト明安イ抗的ウクロコ売一エコヨホ必噴塗ッ。索墓ー足議需レ応予ニ質県トぴン学市機だほせフ車捕コニ自校がこで極3力イい増娘汁表製ク。委セヤホネ作誌ミマクソ続新ほし月中報制どてびフ字78完りっせが村惹ヨサコ訳器りそ参受草ムタ大移ッけでつ番足ほこン質北ぽのよう応一ア輝労イ手人う再茨夕へしう。".as_bytes()); + + // Korean + test_valid("3인은 대법원장이 지명하는 자를 임명한다, 대통령은 제3항과 제4항의 사유를 지체없이 공포하여야 한다, 제한하는 경우에도 자유와 권리의 본질적인 내용을 침해할 수 없다, 국가는 전통문화의 계승·발전과 민족문화의 창달에 노력하여야 한다.".as_bytes()); +} + +#[test] +fn simple_invalid() { + test_invalid_after_prefixes(b"\xFF", 0, Some(1), Some(1)); + + // incomplete umlaut + test_invalid_after_prefixes(b"\xC3", 0, None, Some(1)); + + // incomplete emoji + test_invalid_after_prefixes(b"\xF0", 0, None, Some(1)); + test_invalid_after_prefixes(b"\xF0\x9F", 0, None, Some(2)); + test_invalid_after_prefixes(b"\xF0\x9F\x98", 0, None, Some(3)); +} + +#[test] +fn incomplete_on_32nd_byte() { + let mut invalid = b"a".repeat_x(31); + invalid.push(0xf0); + test_invalid(&invalid, 31, None) +} + +#[test] +fn incomplete_on_64th_byte() { + let mut invalid = b"a".repeat_x(63); + invalid.push(0xf0); + test_invalid(&invalid, 63, None) +} + +#[test] +fn incomplete_on_64th_byte_65_bytes_total() { + let mut invalid = b"a".repeat_x(63); + invalid.push(0xf0); + invalid.push(b'a'); + test_invalid(&invalid, 63, Some(1)) +} + +#[test] +fn error_display_basic() { + assert_eq!( + format!("{}", basic_from_utf8(b"\xF0").unwrap_err()), + "invalid utf-8 sequence" + ); + assert_eq!( + format!("{}", basic_from_utf8(b"a\xF0a").unwrap_err()), + "invalid utf-8 sequence" + ); +} + +#[test] +fn error_display_compat() { + assert_eq!( + format!("{}", compat_from_utf8(b"\xF0").unwrap_err()), + "incomplete utf-8 byte sequence from index 0" + ); + assert_eq!( + format!("{}", compat_from_utf8(b"a\xF0a").unwrap_err()), + "invalid utf-8 sequence of 1 bytes from index 1" + ); + assert_eq!( + format!("{}", compat_from_utf8(b"a\xF0\x9Fa").unwrap_err()), + "invalid utf-8 sequence of 2 bytes from index 1" + ); + assert_eq!( + format!("{}", compat_from_utf8(b"a\xF0\x9F\x98a").unwrap_err()), + "invalid utf-8 sequence of 3 bytes from index 1" + ); +} + +#[test] +fn error_debug_basic() { + assert_eq!( + format!("{:?}", basic_from_utf8(b"\xF0").unwrap_err()), + "Utf8Error" + ); +} + +#[test] +fn error_debug_compat() { + assert_eq!( + format!("{:?}", compat_from_utf8(b"\xF0").unwrap_err()), + "Utf8Error { valid_up_to: 0, error_len: None }" + ); + assert_eq!( + format!("{:?}", compat_from_utf8(b"a\xF0a").unwrap_err()), + "Utf8Error { valid_up_to: 1, error_len: Some(1) }" + ); +} + +#[test] +#[expect(clippy::clone_on_copy)] // used for coverage +fn error_derives_basic() { + let err = basic_from_utf8(b"\xF0").unwrap_err(); + let err2 = err.clone(); + assert_eq!(err, err2); + assert!(!(err != err2)); +} + +#[test] +#[expect(clippy::clone_on_copy)] // used for coverage +fn error_derives_compat() { + let err = compat_from_utf8(b"\xF0").unwrap_err(); + let err2 = err.clone(); + assert_eq!(err, err2); + assert!(!(err != err2)); +}