From 8fa194d7e4350ea8f5255424a44b734b25b091cc Mon Sep 17 00:00:00 2001 From: Lymia Kanokawa Date: Mon, 15 Apr 2024 17:32:22 -0700 Subject: [PATCH 1/6] Ignore IntelliJ directory. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 5b70a89..975b01a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target /Cargo.lock .vscode +.idea From 448292483fd71fa87d8b3799d78d1c5d99b058da Mon Sep 17 00:00:00 2001 From: Lymia Kanokawa Date: Mon, 15 Apr 2024 17:33:05 -0700 Subject: [PATCH 2/6] Simplify test vector code to not duplicate the actual test. --- benches/rand_bench.rs | 6 ++-- src/hasher.rs | 73 +++++++++++++++---------------------------- src/lib.rs | 2 +- 3 files changed, 28 insertions(+), 53 deletions(-) diff --git a/benches/rand_bench.rs b/benches/rand_bench.rs index 45aada7..a290746 100644 --- a/benches/rand_bench.rs +++ b/benches/rand_bench.rs @@ -77,12 +77,10 @@ fn wyhash_benchmark(c: &mut Criterion) { #[cfg(feature = "randomised_wyhash")] c.bench_function("Random Hash new", |b| { - use wyrand::RandomWyHashState; use std::hash::BuildHasher; + use wyrand::RandomWyHashState; - b.iter(|| { - RandomWyHashState::new().build_hasher() - }); + b.iter(|| RandomWyHashState::new().build_hasher()); }); } diff --git a/src/hasher.rs b/src/hasher.rs index 3798837..03d2562 100644 --- a/src/hasher.rs +++ b/src/hasher.rs @@ -195,57 +195,34 @@ mod tests { } #[cfg(not(feature = "v4_2"))] - #[test] - fn expected_final_v4_hasher_output() { - // Test cases generated from the C reference's test_vectors - #[rustfmt::skip] - let test_cases: [(u64, &str); 8] = [ - (0x0409_638e_e2bd_e459, ""), - (0xa841_2d09_1b5f_e0a9, "a"), - (0x32dd_92e4_b291_5153, "abc"), - (0x8619_1240_89a3_a16b, "message digest"), - (0x7a43_afb6_1d7f_5f40, "abcdefghijklmnopqrstuvwxyz"), - (0xff42_329b_90e5_0d58, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), - (0xc39c_ab13_b115_aad3, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"), - (0xe44a_846b_fc65_00cd, "123456789012345678901234567890123456789012345678") - ]; - - test_cases - .into_iter() - .enumerate() - .map(|(seed, (expected, input))| { - let mut hasher = WyHash::new_with_secret(seed as u64, [WY0, WY1, WY2, WY3]); - - hasher.write(input.as_bytes()); - - (input, expected, hasher.finish()) - }) - .for_each(|(input, expected_hash, computed_hash)| { - assert_eq!( - expected_hash, computed_hash, - "Hashed output didn't match expected for \"{}\"", - input - ); - }); - } + #[rustfmt::skip] + const TEST_VECTORS: [(u64, &str); 8] = [ + (0x0409_638e_e2bd_e459, ""), + (0xa841_2d09_1b5f_e0a9, "a"), + (0x32dd_92e4_b291_5153, "abc"), + (0x8619_1240_89a3_a16b, "message digest"), + (0x7a43_afb6_1d7f_5f40, "abcdefghijklmnopqrstuvwxyz"), + (0xff42_329b_90e5_0d58, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), + (0xc39c_ab13_b115_aad3, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"), + (0xe44a_846b_fc65_00cd, "123456789012345678901234567890123456789012345678"), + ]; #[cfg(feature = "v4_2")] + #[rustfmt::skip] + const TEST_VECTORS: [(u64, &str); 8] = [ + (0x9322_8a4d_e0ee_c5a2, ""), + (0xc5ba_c3db_1787_13c4, "a"), + (0xa97f_2f7b_1d9b_3314, "abc"), + (0x786d_1f1d_f380_1df4, "message digest"), + (0xdca5_a813_8ad3_7c87, "abcdefghijklmnopqrstuvwxyz"), + (0xb9e7_34f1_17cf_af70, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), + (0x6cc5_eab4_9a92_d617, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"), + (0xe1d4_c58d_97ba_df5e, "123456789012345678901234567890123456789012345678"), + ]; + #[test] - fn expected_final_v42_hasher_output() { - // Test cases generated from the C reference's test_vectors - #[rustfmt::skip] - let test_cases: [(u64, &str); 8] = [ - (0x9322_8a4d_e0ee_c5a2, ""), - (0xc5ba_c3db_1787_13c4, "a"), - (0xa97f_2f7b_1d9b_3314, "abc"), - (0x786d_1f1d_f380_1df4, "message digest"), - (0xdca5_a813_8ad3_7c87, "abcdefghijklmnopqrstuvwxyz"), - (0xb9e7_34f1_17cf_af70, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), - (0x6cc5_eab4_9a92_d617, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"), - (0xe1d4_c58d_97ba_df5e, "123456789012345678901234567890123456789012345678") - ]; - - test_cases + fn expected_hasher_output() { + TEST_VECTORS .into_iter() .enumerate() .map(|(seed, (expected, input))| { diff --git a/src/lib.rs b/src/lib.rs index 43ac0d8..c5d48bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,8 +8,8 @@ mod constants; #[cfg(feature = "wyhash")] mod hasher; -mod wyrand; mod utils; +mod wyrand; #[cfg(feature = "wyhash")] pub use hasher::*; From d29c0ac3203a3f50e6e4dfdfc638b47ac7fd193f Mon Sep 17 00:00:00 2001 From: Lymia Kanokawa Date: Mon, 15 Apr 2024 17:35:23 -0700 Subject: [PATCH 3/6] Remove impossible chunk condition in hasher (usize is 64-bit at max, and u64::MAX will never exceed it) --- src/hasher.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/hasher.rs b/src/hasher.rs index 03d2562..2068288 100644 --- a/src/hasher.rs +++ b/src/hasher.rs @@ -142,14 +142,12 @@ impl WyHash { impl Hasher for WyHash { #[inline] fn write(&mut self, bytes: &[u8]) { - for chunk in bytes.chunks(u64::MAX as usize) { - let (lo, hi, seed) = self.consume_bytes(chunk); + let (lo, hi, seed) = self.consume_bytes(bytes); - self.lo = lo; - self.hi = hi; - self.seed = seed; - self.size += chunk.len() as u64; - } + self.lo = lo; + self.hi = hi; + self.seed = seed; + self.size += bytes.len() as u64; } #[inline] From 7529629763df83bea820ba48293fe9012e50617d Mon Sep 17 00:00:00 2001 From: Lymia Kanokawa Date: Mon, 15 Apr 2024 17:55:50 -0700 Subject: [PATCH 4/6] Improve code style in consume_bytes to match a more typical Rust style. --- src/hasher.rs | 95 +++++++++++++++++++++++---------------------------- 1 file changed, 43 insertions(+), 52 deletions(-) diff --git a/src/hasher.rs b/src/hasher.rs index 2068288..f662c49 100644 --- a/src/hasher.rs +++ b/src/hasher.rs @@ -75,67 +75,58 @@ impl WyHash { #[inline] fn consume_bytes(&self, bytes: &[u8]) -> (u64, u64, u64) { - let (lo, hi): (u64, u64); let length = bytes.len(); - let mut seed = self.seed; - - match length { - 4..=16 => { - lo = (read_4_bytes(bytes) << 32) | read_4_bytes(&bytes[(length >> 3) << 2..]); - hi = (read_4_bytes(&bytes[length - 4..]) << 32) - | read_4_bytes(&bytes[length - 4 - ((length >> 3) << 2)..]); - } - 1..=3 => { - lo = read_upto_3_bytes(bytes); - hi = 0; - } - 0 => { - lo = 0; - hi = 0; - } - _ => { - let mut index = length; - let mut start = 0; - - if is_over_48_bytes(length) { - let mut seed1 = seed; - let mut seed2 = seed; - - while is_over_48_bytes(index) { - seed = wymix( - read_8_bytes(&bytes[start..]) ^ self.secret[1], - read_8_bytes(&bytes[start + 8..]) ^ seed, - ); - seed1 = wymix( - read_8_bytes(&bytes[start + 16..]) ^ self.secret[2], - read_8_bytes(&bytes[start + 24..]) ^ seed1, - ); - seed2 = wymix( - read_8_bytes(&bytes[start + 32..]) ^ self.secret[3], - read_8_bytes(&bytes[start + 40..]) ^ seed2, - ); - index -= 48; - start += 48; - } - - seed ^= seed1 ^ seed2; - } - - while index > 16 { + if length <= 0 { + (0, 0, self.seed) + } else if length <= 3 { + (read_upto_3_bytes(bytes), 0, self.seed) + } else if length <= 16 { + let lo = (read_4_bytes(bytes) << 32) | read_4_bytes(&bytes[(length >> 3) << 2..]); + let hi = (read_4_bytes(&bytes[length - 4..]) << 32) + | read_4_bytes(&bytes[length - 4 - ((length >> 3) << 2)..]); + (lo, hi, self.seed) + } else { + let mut index = length; + let mut start = 0; + let mut seed = self.seed; + + if is_over_48_bytes(length) { + let mut seed1 = seed; + let mut seed2 = seed; + + while is_over_48_bytes(index) { seed = wymix( read_8_bytes(&bytes[start..]) ^ self.secret[1], read_8_bytes(&bytes[start + 8..]) ^ seed, ); - index -= 16; - start += 16 + seed1 = wymix( + read_8_bytes(&bytes[start + 16..]) ^ self.secret[2], + read_8_bytes(&bytes[start + 24..]) ^ seed1, + ); + seed2 = wymix( + read_8_bytes(&bytes[start + 32..]) ^ self.secret[3], + read_8_bytes(&bytes[start + 40..]) ^ seed2, + ); + index -= 48; + start += 48; } - lo = read_8_bytes(&bytes[length - 16..]); - hi = read_8_bytes(&bytes[length - 8..]); + seed ^= seed1 ^ seed2; + } + + while index > 16 { + seed = wymix( + read_8_bytes(&bytes[start..]) ^ self.secret[1], + read_8_bytes(&bytes[start + 8..]) ^ seed, + ); + index -= 16; + start += 16 } - } - (lo, hi, seed) + let lo = read_8_bytes(&bytes[length - 16..]); + let hi = read_8_bytes(&bytes[length - 8..]); + (lo, hi, seed) + } } } From 20e5f67a14966be6eec1e43c979c42feeb99bb0d Mon Sep 17 00:00:00 2001 From: Lymia Kanokawa Date: Mon, 15 Apr 2024 22:40:52 -0700 Subject: [PATCH 5/6] Allow multiple write commands to function. --- src/hasher.rs | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/src/hasher.rs b/src/hasher.rs index f662c49..6ada99c 100644 --- a/src/hasher.rs +++ b/src/hasher.rs @@ -38,6 +38,15 @@ use self::{ /// /// assert_ne!(hasher.finish(), 5); // Should not be represented by the same value any more /// ``` +/// +/// # Stability +/// +/// The result is only guaranteed to match the result `wyhash` would naturally produce if `write` +/// is called a single time, followed by a call to `finish`. +/// +/// Any other sequence of events (including calls to `write_u32` or similar functions) are +/// guaranteed to have consistent results between platforms and versions of this crate, but may not +/// map well to the reference implementation. #[cfg_attr(docsrs, doc(cfg(feature = "wyhash")))] #[derive(Clone)] pub struct WyHash { @@ -128,11 +137,20 @@ impl WyHash { (lo, hi, seed) } } + + #[inline] + fn check_has_seed(&mut self) { + if self.size != 0 { + self.seed = wymix(self.lo, self.hi ^ self.seed); + } + } } impl Hasher for WyHash { #[inline] fn write(&mut self, bytes: &[u8]) { + self.check_has_seed(); + let (lo, hi, seed) = self.consume_bytes(bytes); self.lo = lo; @@ -215,7 +233,7 @@ mod tests { .into_iter() .enumerate() .map(|(seed, (expected, input))| { - let mut hasher = WyHash::new_with_secret(seed as u64, [WY0, WY1, WY2, WY3]); + let mut hasher = WyHash::new_with_default_secret(seed as u64); hasher.write(input.as_bytes()); @@ -229,4 +247,19 @@ mod tests { ); }); } + + #[test] + fn multiple_writes_are_effective() { + let mut hasher = WyHash::new_with_default_secret(0); + hasher.write(b"abcdef"); + hasher.write(b"abcdef"); + let hash_a = hasher.finish(); + + let mut hasher = WyHash::new_with_default_secret(0); + hasher.write(b"abcdeF"); + hasher.write(b"abcdef"); + let hash_b = hasher.finish(); + + assert_ne!(hash_a, hash_b); + } } From d1ff807e9c3fdb679e3179e58e58aab1a282c551 Mon Sep 17 00:00:00 2001 From: Lymia Kanokawa Date: Mon, 15 Apr 2024 22:53:08 -0700 Subject: [PATCH 6/6] Add special cases for hashing integers. --- src/hasher.rs | 57 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/src/hasher.rs b/src/hasher.rs index 6ada99c..0bd723d 100644 --- a/src/hasher.rs +++ b/src/hasher.rs @@ -139,7 +139,7 @@ impl WyHash { } #[inline] - fn check_has_seed(&mut self) { + fn mix_current_seed(&mut self) { if self.size != 0 { self.seed = wymix(self.lo, self.hi ^ self.seed); } @@ -149,7 +149,7 @@ impl WyHash { impl Hasher for WyHash { #[inline] fn write(&mut self, bytes: &[u8]) { - self.check_has_seed(); + self.mix_current_seed(); let (lo, hi, seed) = self.consume_bytes(bytes); @@ -159,6 +159,42 @@ impl Hasher for WyHash { self.size += bytes.len() as u64; } + #[inline] + fn write_u8(&mut self, i: u8) { + self.write_u64(i as u64) + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.write_u64(i as u64) + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.write_u64(i as u64) + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.mix_current_seed(); + self.lo = i; + self.hi = 0; + self.size += 8; + } + + #[inline] + fn write_u128(&mut self, i: u128) { + self.mix_current_seed(); + self.lo = i as u64; + self.hi = (i >> 64) as u64; + self.size += 16; + } + + #[inline] + fn write_usize(&mut self, i: usize) { + self.write_u64(i as u64); + } + #[inline] fn finish(&self) -> u64 { let (lo, hi) = wymul(self.lo ^ self.secret[1], self.hi ^ self.seed); @@ -187,6 +223,8 @@ mod tests { use super::*; + use core::hash::Hash; + #[cfg(feature = "debug")] #[test] fn no_leaking_debug() { @@ -249,7 +287,7 @@ mod tests { } #[test] - fn multiple_writes_are_effective() { + fn multiple_writes_no_collision() { let mut hasher = WyHash::new_with_default_secret(0); hasher.write(b"abcdef"); hasher.write(b"abcdef"); @@ -262,4 +300,17 @@ mod tests { assert_ne!(hash_a, hash_b); } + + #[test] + fn tuples_no_collision() { + let mut hasher = WyHash::new_with_default_secret(0); + (1000, 2000).hash(&mut hasher); + let hash_a = hasher.finish(); + + let mut hasher = WyHash::new_with_default_secret(0); + (1500, 2000).hash(&mut hasher); + let hash_b = hasher.finish(); + + assert_ne!(hash_a, hash_b); + } }