diff --git a/Cargo.lock b/Cargo.lock index 71dc350..79ddc14 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -93,6 +93,21 @@ dependencies = [ "libc", ] +[[package]] +name = "crc" +version = "3.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86ec7a15cbe22e59248fc7eadb1907dab5ba09372595da4d73dd805ed4417dfe" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cace84e55f07e7301bae1c519df89cdad8cc3cd868413d3fdbdeca9ff3db484" + [[package]] name = "criterion" version = "0.3.5" @@ -247,8 +262,11 @@ dependencies = [ name = "fastmurmur3" version = "0.1.2" dependencies = [ + "crc", "criterion", "fasthash", + "fnv", + "highway", "murmur3", "rand 0.8.4", "rustc-hash", @@ -257,6 +275,12 @@ dependencies = [ "xxhash-rust", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "fuchsia-cprng" version = "0.1.1" @@ -305,6 +329,12 @@ dependencies = [ "libc", ] +[[package]] +name = "highway" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1489f81ead4b71a09ddeab6850c0356c0932587637d753f21ee1010ab875b013" + [[package]] name = "itertools" version = "0.10.3" diff --git a/Cargo.toml b/Cargo.toml index 9b67fbb..1d5e9b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,9 +20,10 @@ keywords = [ ] [features] -# Link the C library for murmur3. You likely never need this feature. -# It exists purely for benchmarking and testing purposes. -"murmur3c"= [] +# Link the C libraries for murmur3. These features make no difference for functionality and they should be disabled. +# They exist purely for benchmarking and testing purposes. +"murmur3c" = [] +"fasthash" = ["dep:fasthash"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [[bench]] @@ -31,13 +32,16 @@ path = "bench/bench.rs" harness = false [dependencies] +fasthash = { version = "0.4.0", optional = true } [dev-dependencies] criterion = "0.3.5" murmur3 = "0.5.1" sha-1 = "0.10.0" rand = "0.8.4" -fasthash = "0.4.0" xxhash-rust = { version = "0.8.2", features = ["xxh3"] } twox-hash = "1.6.1" rustc-hash = "1.1.0" +highway = "0.8.1" +fnv = "1.0.7" +crc = "3.0.1" diff --git a/bench/bench.rs b/bench/bench.rs index 0ae3063..7c6720c 100644 --- a/bench/bench.rs +++ b/bench/bench.rs @@ -1,57 +1,118 @@ +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +#[cfg(feature = "fasthash")] +use fasthash::murmur3::Hash128_x64; +#[cfg(feature = "fasthash")] +use fasthash::FastHash; +use highway::HighwayHash; +use rand::RngCore; use std::hash::Hasher; use std::io::Cursor; -use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId}; -use fasthash::FastHash; -use fasthash::murmur3::Hash128_x64; -use rustc_hash; - -static SOURCE: &'static [u8] = b"The quick brown fox jumps over the lazy dog"; - fn sha1(data: &[u8]) -> [u8; 20] { - use sha1::{Sha1, Digest}; + use sha1::{Digest, Sha1}; // let mut hasher = Sha1::new(); <[u8; 20]>::from(Sha1::digest(data)) } - fn criterion_benchmark(c: &mut Criterion) { let mut group = c.benchmark_group("hashes"); - group.bench_function("sha1", |b| b.iter(|| - sha1(SOURCE) - )); - group.bench_function("fastmurmur3", |b| b.iter(|| - fastmurmur3::hash(SOURCE) - )); - group.bench_function("murmur3c", |b| b.iter(|| - fastmurmur3::murmur3c::hash(SOURCE) - )); - group.bench_function("fasthash", |b| b.iter(|| - Hash128_x64::hash(SOURCE) - )); - group.bench_function("murmur3", |b| b.iter(|| - murmur3::murmur3_x64_128(&mut Cursor::new(SOURCE),0).unwrap() - )); - group.bench_function("twox_hash::Xxh3Hash128", |b| b.iter(|| { - let mut h = twox_hash::xxh3::Hash128::default(); - h.write(SOURCE); - h.finish() - })); - group.bench_function("twox_hash::Xxh3Hash64", |b| b.iter(|| { - let mut h = twox_hash::xxh3::Hash64::default(); - h.write(SOURCE); - h.finish() - })); - group.bench_function("xxhash_rust::xxh3_64", |b| b.iter(|| - xxhash_rust::xxh3::xxh3_64(SOURCE) - )); - group.bench_function("xxhash_rust::xxh3_128", |b| b.iter(|| - xxhash_rust::xxh3::xxh3_128(SOURCE) - )); + for size in [ + 16, 20, 32, 40, 64, 70, 128, 130, 256, 260, 512, 520, 1024, 1030, 2048, 2050, 4096, 5500, + ] { + let mut rng = rand::thread_rng(); + let mut buf = vec![0; size]; + rng.fill_bytes(&mut buf); + + group.bench_with_input(BenchmarkId::new("sha1", size), &size, |b, _size| { + b.iter(|| sha1(&buf)) + }); + + group.bench_with_input(BenchmarkId::new("fastmurmur3", size), &size, |b, _size| { + b.iter(|| fastmurmur3::hash(&buf)) + }); + + #[cfg(feature = "murmur3c")] + group.bench_with_input(BenchmarkId::new("murmur3c", size), &size, |b, _size| { + b.iter(|| fastmurmur3::murmur3c::hash(&buf)) + }); + + #[cfg(feature = "fasthash")] + group.bench_with_input(BenchmarkId::new("fasthash", size), &size, |b, _size| { + b.iter(|| Hash128_x64::hash(&buf)) + }); + + group.bench_with_input(BenchmarkId::new("murmur3", size), &size, |b, _size| { + b.iter(|| murmur3::murmur3_x64_128(&mut Cursor::new(&buf), 0)) + }); + + group.bench_with_input( + BenchmarkId::new("twox_hash::Xxh3Hash128", size), + &size, + |b, _size| { + b.iter(|| { + let mut h = twox_hash::xxh3::Hash128::default(); + h.write(&buf); + h.finish() + }) + }, + ); + + group.bench_with_input( + BenchmarkId::new("twox_hash::Xxh3Hash64", size), + &size, + |b, _size| { + b.iter(|| { + let mut h = twox_hash::xxh3::Hash64::default(); + h.write(&buf); + h.finish() + }) + }, + ); + + group.bench_with_input( + BenchmarkId::new("xxhash_rust::xxh3_64", size), + &size, + |b, _size| b.iter(|| xxhash_rust::xxh3::xxh3_64(&buf)), + ); + + group.bench_with_input( + BenchmarkId::new("xxhash_rust::xxh3_128", size), + &size, + |b, _size| b.iter(|| xxhash_rust::xxh3::xxh3_128(&buf)), + ); + + group.bench_with_input( + BenchmarkId::new("highway::HighwayHasher::hash128", size), + &size, + |b, _size| b.iter(|| highway::HighwayHasher::default().hash128(&buf)), + ); + + group.bench_with_input(BenchmarkId::new("fnv", size), &size, |b, _size| { + b.iter(|| { + let mut hasher = fnv::FnvHasher::default(); + hasher.write(&buf); + hasher.finish() + }) + }); + + group.bench_with_input(BenchmarkId::new("crc32", size), &size, |b, _size| { + b.iter(|| { + let hasher = crc::Crc::::new(&crc::CRC_32_ISCSI); + hasher.checksum(&buf) + }) + }); + + group.bench_with_input(BenchmarkId::new("crc64", size), &size, |b, _size| { + b.iter(|| { + let hasher = crc::Crc::::new(&crc::CRC_64_ECMA_182); + hasher.checksum(&buf) + }) + }); + } + group.finish(); } - criterion_group!(benches, criterion_benchmark); criterion_main!(benches); diff --git a/src/fallthrough.rs b/src/fallthrough.rs index 395ece4..97e6ef4 100644 --- a/src/fallthrough.rs +++ b/src/fallthrough.rs @@ -68,4 +68,4 @@ fn it_works() { _ => { panic!("Should not reach the default case"); }, }); assert_eq!(x, 2); -} \ No newline at end of file +} diff --git a/src/lib.rs b/src/lib.rs index f35db62..5080b3a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,13 +1,13 @@ -mod murmur3rs; mod fallthrough; +mod murmur3rs; pub use murmur3rs::{hash, murmur3_x64_128}; #[cfg(feature = "murmur3c")] pub mod murmur3c; - #[cfg(test)] +#[cfg(feature = "murmur3c")] mod test { use super::*; use rand::{Rng, RngCore}; @@ -33,14 +33,13 @@ mod test { let a = murmur3rs::murmur3_x64_128(&buf, salt); let b = murmur3c::murmur3_x64_128(&buf, salt); assert_eq!( - a, b, + a, + b, "Failed after {} iterations. salt={} data={}", i, salt, - buf.iter() - .map(|b| format!("{:x}", b)) - .collect::(), + buf.iter().map(|b| format!("{:x}", b)).collect::(), ); } } -} \ No newline at end of file +} diff --git a/src/murmur3c.rs b/src/murmur3c.rs index 773ccb8..30b188f 100644 --- a/src/murmur3c.rs +++ b/src/murmur3c.rs @@ -13,7 +13,12 @@ extern "C" { pub fn hash(data: &[u8]) -> u128 { let mut buf = [0u8; 16]; unsafe { - MurmurHash3_x64_128(data.as_ptr() as *const c_void, data.len() as i32, 0, &mut buf as *mut _ as *mut c_void); + MurmurHash3_x64_128( + data.as_ptr() as *const c_void, + data.len() as i32, + 0, + &mut buf as *mut _ as *mut c_void, + ); } if cfg!(target_endian = "big") { u128::from_be_bytes(buf) @@ -22,16 +27,20 @@ pub fn hash(data: &[u8]) -> u128 { } } - #[cfg(target_pointer_width = "64")] pub fn murmur3_x64_128(data: &[u8], salt: u32) -> u128 { let mut buf = [0u8; 16]; unsafe { - MurmurHash3_x64_128(data.as_ptr() as *const c_void, data.len() as i32, salt, &mut buf as *mut _ as *mut c_void); + MurmurHash3_x64_128( + data.as_ptr() as *const c_void, + data.len() as i32, + salt, + &mut buf as *mut _ as *mut c_void, + ); } if cfg!(target_endian = "big") { u128::from_be_bytes(buf) } else { u128::from_le_bytes(buf) } -} \ No newline at end of file +} diff --git a/src/murmur3rs.rs b/src/murmur3rs.rs index 05cfa8c..a16673a 100644 --- a/src/murmur3rs.rs +++ b/src/murmur3rs.rs @@ -1,28 +1,15 @@ #![allow(unreachable_code)] -use std::ops::Shl; use crate::{ - match_fallthrough, + match_fallthrough, match_fallthrough_make_loops, match_fallthrough_make_match, match_fallthrough_reverse_branches, - match_fallthrough_make_loops, - match_fallthrough_make_match }; - +use std::ops::Shl; #[inline] pub fn hash(data: &[u8]) -> u128 { murmur3_x64_128(data, 0) } -/// This macro only prints if we're in test mode. -macro_rules! test_println { - ($($arg:tt)*) => { - if cfg!(test) { - println!($($arg)*) - } - } -} - - #[inline] pub fn murmur3_x64_128(data: &[u8], salt: u32) -> u128 { const C1: u64 = 0x87c3_7b91_1142_53d5; @@ -42,12 +29,10 @@ pub fn murmur3_x64_128(data: &[u8], salt: u32) -> u128 { let mut h2: u64 = salt as u64; for slice in data[..full_block_len].chunks(BLOCK_SIZE) { - let k1 = u64::from_le_bytes(unsafe {*( - slice.as_ptr() as *const [u8; HALF_BLOCK_SIZE] - )}); - let k2 = u64::from_le_bytes(unsafe {*( - slice.as_ptr().offset(HALF_BLOCK_SIZE as isize) as *const [u8; HALF_BLOCK_SIZE] - )}); + let k1 = u64::from_le_bytes(unsafe { *(slice.as_ptr() as *const [u8; HALF_BLOCK_SIZE]) }); + let k2 = u64::from_le_bytes(unsafe { + *(slice.as_ptr().add(HALF_BLOCK_SIZE) as *const [u8; HALF_BLOCK_SIZE]) + }); h1 ^= k1.wrapping_mul(C1).rotate_left(R2).wrapping_mul(C2); h1 = h1 .rotate_left(R1) @@ -107,30 +92,26 @@ pub fn murmur3_x64_128(data: &[u8], salt: u32) -> u128 { h2 = fmix64(h2); h1 = h1.wrapping_add(h2); h2 = h2.wrapping_add(h1); - u128::from_ne_bytes(unsafe {*([h1, h2].as_ptr() as *const [u8; 16])}) + u128::from_ne_bytes(unsafe { *([h1, h2].as_ptr() as *const [u8; 16]) }) } - trait XorShift { fn xor_shr(&self, shift: u32) -> Self; } - impl XorShift for u64 { fn xor_shr(&self, shift: u32) -> Self { self ^ (self >> shift) } } - fn fmix64(k: u64) -> u64 { const C1: u64 = 0xff51_afd7_ed55_8ccd; const C2: u64 = 0xc4ce_b9fe_1a85_ec53; const R: u32 = 33; - k - .xor_shr(R) + k.xor_shr(R) .wrapping_mul(C1) .xor_shr(R) .wrapping_mul(C2) .xor_shr(R) -} \ No newline at end of file +}