Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issues with using WyHash with std::hash::Hash #9

Merged
merged 6 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/target
/Cargo.lock
.vscode
.idea
6 changes: 2 additions & 4 deletions benches/rand_bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,10 @@ fn wyhash_benchmark(c: &mut Criterion) {

#[cfg(feature = "randomised_wyhash")]
c.bench_function("Random Hash new", |b| {
use wyrand::RandomWyHashState;
use std::hash::BuildHasher;
use wyrand::RandomWyHashState;

b.iter(|| {
RandomWyHashState::new().build_hasher()
});
b.iter(|| RandomWyHashState::new().build_hasher());
});
}

Expand Down
258 changes: 154 additions & 104 deletions src/hasher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,15 @@ use self::{
///
/// assert_ne!(hasher.finish(), 5); // Should not be represented by the same value any more
/// ```
///
/// # Stability
///
/// The result is only guaranteed to match the result `wyhash` would naturally produce if `write`
/// is called a single time, followed by a call to `finish`.
///
/// Any other sequence of events (including calls to `write_u32` or similar functions) are
/// guaranteed to have consistent results between platforms and versions of this crate, but may not
/// map well to the reference implementation.
#[cfg_attr(docsrs, doc(cfg(feature = "wyhash")))]
#[derive(Clone)]
pub struct WyHash {
Expand Down Expand Up @@ -75,81 +84,115 @@ impl WyHash {

#[inline]
fn consume_bytes(&self, bytes: &[u8]) -> (u64, u64, u64) {
let (lo, hi): (u64, u64);
let length = bytes.len();
let mut seed = self.seed;

match length {
4..=16 => {
lo = (read_4_bytes(bytes) << 32) | read_4_bytes(&bytes[(length >> 3) << 2..]);
hi = (read_4_bytes(&bytes[length - 4..]) << 32)
| read_4_bytes(&bytes[length - 4 - ((length >> 3) << 2)..]);
}
1..=3 => {
lo = read_upto_3_bytes(bytes);
hi = 0;
}
0 => {
lo = 0;
hi = 0;
}
_ => {
let mut index = length;
let mut start = 0;

if is_over_48_bytes(length) {
let mut seed1 = seed;
let mut seed2 = seed;

while is_over_48_bytes(index) {
seed = wymix(
read_8_bytes(&bytes[start..]) ^ self.secret[1],
read_8_bytes(&bytes[start + 8..]) ^ seed,
);
seed1 = wymix(
read_8_bytes(&bytes[start + 16..]) ^ self.secret[2],
read_8_bytes(&bytes[start + 24..]) ^ seed1,
);
seed2 = wymix(
read_8_bytes(&bytes[start + 32..]) ^ self.secret[3],
read_8_bytes(&bytes[start + 40..]) ^ seed2,
);
index -= 48;
start += 48;
}

seed ^= seed1 ^ seed2;
}

while index > 16 {
if length <= 0 {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Super minor nit, but since len() returns a usize, there won't be anything smaller than a 0 here, so can just == 0. Not too fussed though about this one.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah. Tbh, mostly had that just because all the clauses were length <=.

(0, 0, self.seed)
} else if length <= 3 {
(read_upto_3_bytes(bytes), 0, self.seed)
} else if length <= 16 {
let lo = (read_4_bytes(bytes) << 32) | read_4_bytes(&bytes[(length >> 3) << 2..]);
let hi = (read_4_bytes(&bytes[length - 4..]) << 32)
| read_4_bytes(&bytes[length - 4 - ((length >> 3) << 2)..]);
(lo, hi, self.seed)
} else {
let mut index = length;
let mut start = 0;
let mut seed = self.seed;

if is_over_48_bytes(length) {
let mut seed1 = seed;
let mut seed2 = seed;

while is_over_48_bytes(index) {
seed = wymix(
read_8_bytes(&bytes[start..]) ^ self.secret[1],
read_8_bytes(&bytes[start + 8..]) ^ seed,
);
index -= 16;
start += 16
seed1 = wymix(
read_8_bytes(&bytes[start + 16..]) ^ self.secret[2],
read_8_bytes(&bytes[start + 24..]) ^ seed1,
);
seed2 = wymix(
read_8_bytes(&bytes[start + 32..]) ^ self.secret[3],
read_8_bytes(&bytes[start + 40..]) ^ seed2,
);
index -= 48;
start += 48;
}

lo = read_8_bytes(&bytes[length - 16..]);
hi = read_8_bytes(&bytes[length - 8..]);
seed ^= seed1 ^ seed2;
}

while index > 16 {
seed = wymix(
read_8_bytes(&bytes[start..]) ^ self.secret[1],
read_8_bytes(&bytes[start + 8..]) ^ seed,
);
index -= 16;
start += 16
}

let lo = read_8_bytes(&bytes[length - 16..]);
let hi = read_8_bytes(&bytes[length - 8..]);
(lo, hi, seed)
}
}

(lo, hi, seed)
#[inline]
fn mix_current_seed(&mut self) {
if self.size != 0 {
self.seed = wymix(self.lo, self.hi ^ self.seed);
}
}
}

impl Hasher for WyHash {
#[inline]
fn write(&mut self, bytes: &[u8]) {
for chunk in bytes.chunks(u64::MAX as usize) {
let (lo, hi, seed) = self.consume_bytes(chunk);
self.mix_current_seed();

self.lo = lo;
self.hi = hi;
self.seed = seed;
self.size += chunk.len() as u64;
}
let (lo, hi, seed) = self.consume_bytes(bytes);

self.lo = lo;
self.hi = hi;
self.seed = seed;
self.size += bytes.len() as u64;
}

#[inline]
fn write_u8(&mut self, i: u8) {
self.write_u64(i as u64)
}

#[inline]
fn write_u16(&mut self, i: u16) {
self.write_u64(i as u64)
}

#[inline]
fn write_u32(&mut self, i: u32) {
self.write_u64(i as u64)
}

#[inline]
fn write_u64(&mut self, i: u64) {
self.mix_current_seed();
self.lo = i;
self.hi = 0;
self.size += 8;
}

#[inline]
fn write_u128(&mut self, i: u128) {
self.mix_current_seed();
self.lo = i as u64;
self.hi = (i >> 64) as u64;
self.size += 16;
}

#[inline]
fn write_usize(&mut self, i: usize) {
self.write_u64(i as u64);
}

#[inline]
Expand Down Expand Up @@ -180,6 +223,8 @@ mod tests {

use super::*;

use core::hash::Hash;

#[cfg(feature = "debug")]
#[test]
fn no_leaking_debug() {
Expand All @@ -195,26 +240,38 @@ mod tests {
}

#[cfg(not(feature = "v4_2"))]
#[rustfmt::skip]
const TEST_VECTORS: [(u64, &str); 8] = [
(0x0409_638e_e2bd_e459, ""),
(0xa841_2d09_1b5f_e0a9, "a"),
(0x32dd_92e4_b291_5153, "abc"),
(0x8619_1240_89a3_a16b, "message digest"),
(0x7a43_afb6_1d7f_5f40, "abcdefghijklmnopqrstuvwxyz"),
(0xff42_329b_90e5_0d58, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"),
(0xc39c_ab13_b115_aad3, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"),
(0xe44a_846b_fc65_00cd, "123456789012345678901234567890123456789012345678"),
];

#[cfg(feature = "v4_2")]
#[rustfmt::skip]
const TEST_VECTORS: [(u64, &str); 8] = [
(0x9322_8a4d_e0ee_c5a2, ""),
(0xc5ba_c3db_1787_13c4, "a"),
(0xa97f_2f7b_1d9b_3314, "abc"),
(0x786d_1f1d_f380_1df4, "message digest"),
(0xdca5_a813_8ad3_7c87, "abcdefghijklmnopqrstuvwxyz"),
(0xb9e7_34f1_17cf_af70, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"),
(0x6cc5_eab4_9a92_d617, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"),
(0xe1d4_c58d_97ba_df5e, "123456789012345678901234567890123456789012345678"),
];

#[test]
fn expected_final_v4_hasher_output() {
// Test cases generated from the C reference's test_vectors
#[rustfmt::skip]
let test_cases: [(u64, &str); 8] = [
(0x0409_638e_e2bd_e459, ""),
(0xa841_2d09_1b5f_e0a9, "a"),
(0x32dd_92e4_b291_5153, "abc"),
(0x8619_1240_89a3_a16b, "message digest"),
(0x7a43_afb6_1d7f_5f40, "abcdefghijklmnopqrstuvwxyz"),
(0xff42_329b_90e5_0d58, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"),
(0xc39c_ab13_b115_aad3, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"),
(0xe44a_846b_fc65_00cd, "123456789012345678901234567890123456789012345678")
];

test_cases
fn expected_hasher_output() {
TEST_VECTORS
.into_iter()
.enumerate()
.map(|(seed, (expected, input))| {
let mut hasher = WyHash::new_with_secret(seed as u64, [WY0, WY1, WY2, WY3]);
let mut hasher = WyHash::new_with_default_secret(seed as u64);

hasher.write(input.as_bytes());

Expand All @@ -229,38 +286,31 @@ mod tests {
});
}

#[cfg(feature = "v4_2")]
#[test]
fn expected_final_v42_hasher_output() {
// Test cases generated from the C reference's test_vectors
#[rustfmt::skip]
let test_cases: [(u64, &str); 8] = [
(0x9322_8a4d_e0ee_c5a2, ""),
(0xc5ba_c3db_1787_13c4, "a"),
(0xa97f_2f7b_1d9b_3314, "abc"),
(0x786d_1f1d_f380_1df4, "message digest"),
(0xdca5_a813_8ad3_7c87, "abcdefghijklmnopqrstuvwxyz"),
(0xb9e7_34f1_17cf_af70, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"),
(0x6cc5_eab4_9a92_d617, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"),
(0xe1d4_c58d_97ba_df5e, "123456789012345678901234567890123456789012345678")
];

test_cases
.into_iter()
.enumerate()
.map(|(seed, (expected, input))| {
let mut hasher = WyHash::new_with_secret(seed as u64, [WY0, WY1, WY2, WY3]);
fn multiple_writes_no_collision() {
let mut hasher = WyHash::new_with_default_secret(0);
hasher.write(b"abcdef");
hasher.write(b"abcdef");
let hash_a = hasher.finish();

let mut hasher = WyHash::new_with_default_secret(0);
hasher.write(b"abcdeF");
hasher.write(b"abcdef");
let hash_b = hasher.finish();

assert_ne!(hash_a, hash_b);
}

hasher.write(input.as_bytes());
#[test]
fn tuples_no_collision() {
let mut hasher = WyHash::new_with_default_secret(0);
(1000, 2000).hash(&mut hasher);
let hash_a = hasher.finish();

(input, expected, hasher.finish())
})
.for_each(|(input, expected_hash, computed_hash)| {
assert_eq!(
expected_hash, computed_hash,
"Hashed output didn't match expected for \"{}\"",
input
);
});
let mut hasher = WyHash::new_with_default_secret(0);
(1500, 2000).hash(&mut hasher);
let hash_b = hasher.finish();

assert_ne!(hash_a, hash_b);
}
}
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
mod constants;
#[cfg(feature = "wyhash")]
mod hasher;
mod wyrand;
mod utils;
mod wyrand;

#[cfg(feature = "wyhash")]
pub use hasher::*;
Expand Down