rusticstuff · hkratz · Oct 20, 2024 · Oct 20, 2024 · Oct 20, 2024 · Oct 20, 2024
diff --git a/bench/Cargo.lock b/bench/Cargo.lock
diff --git a/bench/Cargo.toml b/bench/Cargo.toml
@@ -30,6 +30,7 @@ simdutf8_wasmtime = ["wasmtime"]
 core_affinity = "0.8.1"
 criterion = "0.5.1"
 simdutf8 = { version = "*", path = "..", features = ["aarch64_neon"] }
+simdutf8-portable = { version = "*", path = "../portable" }
 simdjson-utf8 = { version = "*", path = "simdjson-utf8", optional = true }
 # default is cranelift which is not as performant as the llvm backend
 wasmer = { version = "2.1", optional = true, default-features = false }
@@ -47,6 +48,14 @@ harness = false
 name = "throughput_compat"
 harness = false
 
+[[bench]]
+name = "throughput_basic_portable"
+harness = false
+
+[[bench]]
+name = "throughput_compat_portable"
+harness = false
+
 [[bench]]
 name = "throughput_std"
 harness = false

diff --git a/bench/benches/throughput_basic_portable.rs b/bench/benches/throughput_basic_portable.rs
@@ -0,0 +1,3 @@
+use simdutf8_bench::define_throughput_benchmark;
+
+define_throughput_benchmark!(BenchFn::BasicPortable);
diff --git a/bench/benches/throughput_compat_portable.rs b/bench/benches/throughput_compat_portable.rs
@@ -0,0 +1,3 @@
+use simdutf8_bench::define_throughput_benchmark;
+
+define_throughput_benchmark!(BenchFn::CompatPortable);
diff --git a/bench/src/lib.rs b/bench/src/lib.rs
@@ -1,6 +1,8 @@
 use criterion::{measurement::Measurement, BenchmarkGroup, BenchmarkId, Criterion, Throughput};
 use simdutf8::basic::from_utf8 as basic_from_utf8;
 use simdutf8::compat::from_utf8 as compat_from_utf8;
+use simdutf8_portable::basic::from_utf8 as basic_from_utf8_portable;
+use simdutf8_portable::compat::from_utf8 as compat_from_utf8_portable;
 
 use std::str::from_utf8 as std_from_utf8;
 
@@ -29,6 +31,8 @@ pub enum BenchFn {
     Basic,
     BasicNoInline,
     Compat,
+    BasicPortable,
+    CompatPortable,
     Std,
 
     #[cfg(feature = "simdjson")]
@@ -134,11 +138,12 @@ fn get_valid_slice_of_len_or_more_aligned(
 fn bench<M: Measurement>(c: &mut Criterion<M>, name: &str, bytes: &[u8], bench_fn: BenchFn) {
     let mut group = c.benchmark_group(name);
     for i in [1, 8, 64, 512, 4096, 65536, 131072].iter() {
+        let i = i + 33;
         let alignment = Alignment {
             boundary: 64,
             offset: 8, // 8 is the default alignment on 64-bit, so this is what can be expected worst-case
         };
-        let (vec, offset) = get_valid_slice_of_len_or_more_aligned(bytes, *i, alignment);
+        let (vec, offset) = get_valid_slice_of_len_or_more_aligned(bytes, i, alignment);
         let slice = &vec[offset..];
         assert_eq!(
             (slice.as_ptr() as usize) % alignment.boundary,
@@ -192,6 +197,24 @@ fn bench_input<M: Measurement>(
                 },
             );
         }
+        BenchFn::BasicPortable => {
+            group.bench_with_input(
+                BenchmarkId::from_parameter(format!("{:06}", input.len())),
+                &input,
+                |b, &slice| {
+                    b.iter(|| assert_eq!(basic_from_utf8_portable(slice).is_ok(), expected_ok));
+                },
+            );
+        }
+        BenchFn::CompatPortable => {
+            group.bench_with_input(
+                BenchmarkId::from_parameter(format!("{:06}", input.len())),
+                &input,
+                |b, &slice| {
+                    b.iter(|| assert_eq!(compat_from_utf8_portable(slice).is_ok(), expected_ok));
+                },
+            );
+        }
         BenchFn::Std => {
             group.bench_with_input(
                 BenchmarkId::from_parameter(format!("{:06}", input.len())),

diff --git a/nightly_workspace/.gitignore b/nightly_workspace/.gitignore
@@ -0,0 +1,2 @@
+/Cargo.lock
+/target
diff --git a/nightly_workspace/Cargo.toml b/nightly_workspace/Cargo.toml
@@ -0,0 +1,6 @@
+[workspace]
+members = [
+    "simdutf8",
+    "simdutf8/portable",
+    "simdutf8/bench"
+]
diff --git a/nightly_workspace/simdutf8 b/nightly_workspace/simdutf8
@@ -0,0 +1 @@
+..
diff --git a/portable/.gitignore b/portable/.gitignore
@@ -0,0 +1,6 @@
+/target
+/.vscode
+/.idea
+/.zed
+/.cargo
+/Cargo.lock
diff --git a/portable/.prettierrc.toml b/portable/.prettierrc.toml
@@ -0,0 +1,2 @@
+proseWrap = "always"
+printWidth = 100
diff --git a/portable/Cargo.toml b/portable/Cargo.toml
@@ -0,0 +1,40 @@
+[package]
+name = "simdutf8-portable"
+version = "0.0.1"
+authors = ["Hans Kratz <[email protected]>"]
+edition = "2024"
+description = "SIMD-accelerated UTF-8 validation using core::simd (experimental)"
+documentation = "https://docs.rs/simdutf8-portable/"
+homepage = "https://github.com/rusticstuff/simdutf8/tree/main/portable"
+repository = "https://github.com/rusticstuff/simdutf8"
+readme = "README.md"
+keywords = ["utf-8", "unicode", "string", "validation", "simd"]
+categories = ["encoding", "algorithms", "no-std"]
+license = "MIT OR Apache-2.0"
+
+[features]
+default = ["std"]
+
+std = []
+
+# expose SIMD implementations in basic::imp::* and compat::imp::*
+public_imp = []
+
+# features to force a certain implementation. Features earlier in the list take
+# precedence.
+
+# force non-SIMD fallback implementation (for testing)
+force_fallback = []
+# force 128-bit/256-bit SIMD implementation.
+# CAVE: slower than even the fallback implementation if not all SIMD functions
+# have a fast implementation, in particular `swizzle_dyn` needs to be fast.
+force_simd128 = []
+force_simd256 = []
+
+[package.metadata.docs.rs]
+features = ["public_imp"]
+rustdoc-args = ["--cfg", "docsrs"]
+default-target = "x86_64-unknown-linux-gnu"
+
+[dependencies]
+cfg-if = "1.0.0"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		use simdutf8_bench::define_throughput_benchmark;

		define_throughput_benchmark!(BenchFn::BasicPortable);
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		use simdutf8_bench::define_throughput_benchmark;

		define_throughput_benchmark!(BenchFn::CompatPortable);