Skip to content

Commit

Permalink
Fix issues with using WyHash with std::hash::Hash (#9)
Browse files Browse the repository at this point in the history
* Ignore IntelliJ directory.

* Simplify test vector code to not duplicate the actual test.

* Remove impossible chunk condition in hasher (usize is 64-bit at max, and u64::MAX will never exceed it)

* Improve code style in consume_bytes to match a more typical Rust style.

* Allow multiple write commands to function.

* Add special cases for hashing integers.
  • Loading branch information
Lymia authored Apr 16, 2024
1 parent 6844aab commit e436ed7
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 109 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/target
/Cargo.lock
.vscode
.idea
6 changes: 2 additions & 4 deletions benches/rand_bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,10 @@ fn wyhash_benchmark(c: &mut Criterion) {

#[cfg(feature = "randomised_wyhash")]
c.bench_function("Random Hash new", |b| {
use wyrand::RandomWyHashState;
use std::hash::BuildHasher;
use wyrand::RandomWyHashState;

b.iter(|| {
RandomWyHashState::new().build_hasher()
});
b.iter(|| RandomWyHashState::new().build_hasher());
});
}

Expand Down
258 changes: 154 additions & 104 deletions src/hasher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,15 @@ use self::{
///
/// assert_ne!(hasher.finish(), 5); // Should not be represented by the same value any more
/// ```
///
/// # Stability
///
/// The result is only guaranteed to match the result `wyhash` would naturally produce if `write`
/// is called a single time, followed by a call to `finish`.
///
/// Any other sequence of events (including calls to `write_u32` or similar functions) are
/// guaranteed to have consistent results between platforms and versions of this crate, but may not
/// map well to the reference implementation.
#[cfg_attr(docsrs, doc(cfg(feature = "wyhash")))]
#[derive(Clone)]
pub struct WyHash {
Expand Down Expand Up @@ -75,81 +84,115 @@ impl WyHash {

#[inline]
fn consume_bytes(&self, bytes: &[u8]) -> (u64, u64, u64) {
let (lo, hi): (u64, u64);
let length = bytes.len();
let mut seed = self.seed;

match length {
4..=16 => {
lo = (read_4_bytes(bytes) << 32) | read_4_bytes(&bytes[(length >> 3) << 2..]);
hi = (read_4_bytes(&bytes[length - 4..]) << 32)
| read_4_bytes(&bytes[length - 4 - ((length >> 3) << 2)..]);
}
1..=3 => {
lo = read_upto_3_bytes(bytes);
hi = 0;
}
0 => {
lo = 0;
hi = 0;
}
_ => {
let mut index = length;
let mut start = 0;

if is_over_48_bytes(length) {
let mut seed1 = seed;
let mut seed2 = seed;

while is_over_48_bytes(index) {
seed = wymix(
read_8_bytes(&bytes[start..]) ^ self.secret[1],
read_8_bytes(&bytes[start + 8..]) ^ seed,
);
seed1 = wymix(
read_8_bytes(&bytes[start + 16..]) ^ self.secret[2],
read_8_bytes(&bytes[start + 24..]) ^ seed1,
);
seed2 = wymix(
read_8_bytes(&bytes[start + 32..]) ^ self.secret[3],
read_8_bytes(&bytes[start + 40..]) ^ seed2,
);
index -= 48;
start += 48;
}

seed ^= seed1 ^ seed2;
}

while index > 16 {
if length <= 0 {
(0, 0, self.seed)
} else if length <= 3 {
(read_upto_3_bytes(bytes), 0, self.seed)
} else if length <= 16 {
let lo = (read_4_bytes(bytes) << 32) | read_4_bytes(&bytes[(length >> 3) << 2..]);
let hi = (read_4_bytes(&bytes[length - 4..]) << 32)
| read_4_bytes(&bytes[length - 4 - ((length >> 3) << 2)..]);
(lo, hi, self.seed)
} else {
let mut index = length;
let mut start = 0;
let mut seed = self.seed;

if is_over_48_bytes(length) {
let mut seed1 = seed;
let mut seed2 = seed;

while is_over_48_bytes(index) {
seed = wymix(
read_8_bytes(&bytes[start..]) ^ self.secret[1],
read_8_bytes(&bytes[start + 8..]) ^ seed,
);
index -= 16;
start += 16
seed1 = wymix(
read_8_bytes(&bytes[start + 16..]) ^ self.secret[2],
read_8_bytes(&bytes[start + 24..]) ^ seed1,
);
seed2 = wymix(
read_8_bytes(&bytes[start + 32..]) ^ self.secret[3],
read_8_bytes(&bytes[start + 40..]) ^ seed2,
);
index -= 48;
start += 48;
}

lo = read_8_bytes(&bytes[length - 16..]);
hi = read_8_bytes(&bytes[length - 8..]);
seed ^= seed1 ^ seed2;
}

while index > 16 {
seed = wymix(
read_8_bytes(&bytes[start..]) ^ self.secret[1],
read_8_bytes(&bytes[start + 8..]) ^ seed,
);
index -= 16;
start += 16
}

let lo = read_8_bytes(&bytes[length - 16..]);
let hi = read_8_bytes(&bytes[length - 8..]);
(lo, hi, seed)
}
}

(lo, hi, seed)
#[inline]
fn mix_current_seed(&mut self) {
if self.size != 0 {
self.seed = wymix(self.lo, self.hi ^ self.seed);
}
}
}

impl Hasher for WyHash {
#[inline]
fn write(&mut self, bytes: &[u8]) {
for chunk in bytes.chunks(u64::MAX as usize) {
let (lo, hi, seed) = self.consume_bytes(chunk);
self.mix_current_seed();

self.lo = lo;
self.hi = hi;
self.seed = seed;
self.size += chunk.len() as u64;
}
let (lo, hi, seed) = self.consume_bytes(bytes);

self.lo = lo;
self.hi = hi;
self.seed = seed;
self.size += bytes.len() as u64;
}

#[inline]
fn write_u8(&mut self, i: u8) {
self.write_u64(i as u64)
}

#[inline]
fn write_u16(&mut self, i: u16) {
self.write_u64(i as u64)
}

#[inline]
fn write_u32(&mut self, i: u32) {
self.write_u64(i as u64)
}

#[inline]
fn write_u64(&mut self, i: u64) {
self.mix_current_seed();
self.lo = i;
self.hi = 0;
self.size += 8;
}

#[inline]
fn write_u128(&mut self, i: u128) {
self.mix_current_seed();
self.lo = i as u64;
self.hi = (i >> 64) as u64;
self.size += 16;
}

#[inline]
fn write_usize(&mut self, i: usize) {
self.write_u64(i as u64);
}

#[inline]
Expand Down Expand Up @@ -180,6 +223,8 @@ mod tests {

use super::*;

use core::hash::Hash;

#[cfg(feature = "debug")]
#[test]
fn no_leaking_debug() {
Expand All @@ -195,26 +240,38 @@ mod tests {
}

#[cfg(not(feature = "v4_2"))]
#[rustfmt::skip]
const TEST_VECTORS: [(u64, &str); 8] = [
(0x0409_638e_e2bd_e459, ""),
(0xa841_2d09_1b5f_e0a9, "a"),
(0x32dd_92e4_b291_5153, "abc"),
(0x8619_1240_89a3_a16b, "message digest"),
(0x7a43_afb6_1d7f_5f40, "abcdefghijklmnopqrstuvwxyz"),
(0xff42_329b_90e5_0d58, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"),
(0xc39c_ab13_b115_aad3, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"),
(0xe44a_846b_fc65_00cd, "123456789012345678901234567890123456789012345678"),
];

#[cfg(feature = "v4_2")]
#[rustfmt::skip]
const TEST_VECTORS: [(u64, &str); 8] = [
(0x9322_8a4d_e0ee_c5a2, ""),
(0xc5ba_c3db_1787_13c4, "a"),
(0xa97f_2f7b_1d9b_3314, "abc"),
(0x786d_1f1d_f380_1df4, "message digest"),
(0xdca5_a813_8ad3_7c87, "abcdefghijklmnopqrstuvwxyz"),
(0xb9e7_34f1_17cf_af70, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"),
(0x6cc5_eab4_9a92_d617, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"),
(0xe1d4_c58d_97ba_df5e, "123456789012345678901234567890123456789012345678"),
];

#[test]
fn expected_final_v4_hasher_output() {
// Test cases generated from the C reference's test_vectors
#[rustfmt::skip]
let test_cases: [(u64, &str); 8] = [
(0x0409_638e_e2bd_e459, ""),
(0xa841_2d09_1b5f_e0a9, "a"),
(0x32dd_92e4_b291_5153, "abc"),
(0x8619_1240_89a3_a16b, "message digest"),
(0x7a43_afb6_1d7f_5f40, "abcdefghijklmnopqrstuvwxyz"),
(0xff42_329b_90e5_0d58, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"),
(0xc39c_ab13_b115_aad3, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"),
(0xe44a_846b_fc65_00cd, "123456789012345678901234567890123456789012345678")
];

test_cases
fn expected_hasher_output() {
TEST_VECTORS
.into_iter()
.enumerate()
.map(|(seed, (expected, input))| {
let mut hasher = WyHash::new_with_secret(seed as u64, [WY0, WY1, WY2, WY3]);
let mut hasher = WyHash::new_with_default_secret(seed as u64);

hasher.write(input.as_bytes());

Expand All @@ -229,38 +286,31 @@ mod tests {
});
}

#[cfg(feature = "v4_2")]
#[test]
fn expected_final_v42_hasher_output() {
// Test cases generated from the C reference's test_vectors
#[rustfmt::skip]
let test_cases: [(u64, &str); 8] = [
(0x9322_8a4d_e0ee_c5a2, ""),
(0xc5ba_c3db_1787_13c4, "a"),
(0xa97f_2f7b_1d9b_3314, "abc"),
(0x786d_1f1d_f380_1df4, "message digest"),
(0xdca5_a813_8ad3_7c87, "abcdefghijklmnopqrstuvwxyz"),
(0xb9e7_34f1_17cf_af70, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"),
(0x6cc5_eab4_9a92_d617, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"),
(0xe1d4_c58d_97ba_df5e, "123456789012345678901234567890123456789012345678")
];

test_cases
.into_iter()
.enumerate()
.map(|(seed, (expected, input))| {
let mut hasher = WyHash::new_with_secret(seed as u64, [WY0, WY1, WY2, WY3]);
fn multiple_writes_no_collision() {
let mut hasher = WyHash::new_with_default_secret(0);
hasher.write(b"abcdef");
hasher.write(b"abcdef");
let hash_a = hasher.finish();

let mut hasher = WyHash::new_with_default_secret(0);
hasher.write(b"abcdeF");
hasher.write(b"abcdef");
let hash_b = hasher.finish();

assert_ne!(hash_a, hash_b);
}

hasher.write(input.as_bytes());
#[test]
fn tuples_no_collision() {
let mut hasher = WyHash::new_with_default_secret(0);
(1000, 2000).hash(&mut hasher);
let hash_a = hasher.finish();

(input, expected, hasher.finish())
})
.for_each(|(input, expected_hash, computed_hash)| {
assert_eq!(
expected_hash, computed_hash,
"Hashed output didn't match expected for \"{}\"",
input
);
});
let mut hasher = WyHash::new_with_default_secret(0);
(1500, 2000).hash(&mut hasher);
let hash_b = hasher.finish();

assert_ne!(hash_a, hash_b);
}
}
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
mod constants;
#[cfg(feature = "wyhash")]
mod hasher;
mod wyrand;
mod utils;
mod wyrand;

#[cfg(feature = "wyhash")]
pub use hasher::*;
Expand Down

0 comments on commit e436ed7

Please sign in to comment.