From e61bc42dfba6daa43dda27d90d4b7ddb51bfba34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gon=C3=A7alo=20Rica=20Pais=20da=20Silva?= Date: Thu, 29 Feb 2024 18:02:25 +0100 Subject: [PATCH 1/2] feat: Randomised WyHash builder --- .github/workflows/ci.yml | 2 +- Cargo.toml | 6 +- README.md | 10 ++++ src/hasher.rs | 19 ++++++- src/hasher/builder.rs | 115 +++++++++++++++++++++++++++++++++++++++ src/hasher/read.rs | 9 +-- src/lib.rs | 3 +- src/rand.rs | 6 +- 8 files changed, 154 insertions(+), 16 deletions(-) create mode 100644 src/hasher/builder.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 069deb3..6c49210 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,7 +41,7 @@ jobs: - name: Full feature + v4.2 testing run: cargo test --all-features - name: v4 compatibility testing - run: cargo test --no-default-features --features debug,rand_core,wyhash + run: cargo test --no-default-features --features debug,rand_core,wyhash,randomised_wyhash msrv: runs-on: ubuntu-latest diff --git a/Cargo.toml b/Cargo.toml index 1b00356..6543ff7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,12 @@ [package] name = "wyrand" -version = "0.1.3" +version = "0.1.4" edition = "2021" authors = ["Gonçalo Rica Pais da Silva "] description = "A fast & portable non-cryptographic pseudorandom number generator and hashing algorithm" license = "Apache-2.0 OR MIT" repository = "https://github.com/Bluefinger/wyrand-rs" -keywords = ["fast", "rand", "random", "wyrand"] +keywords = ["fast", "random", "wyrand", "hash", "wyhash"] categories = ["algorithms", "no-std"] exclude = ["/.*"] resolver = "2" @@ -19,9 +19,11 @@ hash = [] rand_core = ["dep:rand_core"] serde1 = ["dep:serde"] wyhash = [] +randomised_wyhash = ["wyhash", "dep:getrandom"] v4_2 = [] [dependencies] +getrandom = { version = "0.2", optional = true } rand_core = { version = "0.6", default-features = false, optional = true } serde = { version = "1.0", features = ["derive"], optional = true } diff --git a/README.md b/README.md index a014d77..8f2f535 100644 --- a/README.md +++ b/README.md @@ -35,8 +35,18 @@ The crate will always export `WyRand` and will do so when set as `default-featur - **`serde1`** - Enables `Serialize` and `Deserialize` derives on `WyRand`. - **`hash`** - Enables `core::hash::Hash` implementation for `WyRand`. - **`wyhash`** - Enables `WyHash`, a fast & portable hashing algorithm. Based on the final v4 C implementation. +- **`randomised_wyhash`** - Enables `RandomisedWyHashBuilder`, a means to source a randomised state for `WyHash` for use in collections like `HashMap`/`HashSet`. Enables `wyhash` feature if it is not already enabled. - **`v4_2`** - Switches the PRNG/Hashing algorithms to use the final v4.2 implementation. +## Building for WASM/Web + +If you are using `WyRand` with `rand_core` and/or `WyHash` with `randomised_wyhash` then for building for the web/WASM, you'll need to configure `getrandom` to make use of the browser APIs in order to source entropy from. Add the following to your project `Cargo.toml` if your WASM builds target the web: + +```toml +[target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dependencies] +getrandom = { version = "0.2", features = ["js"] } +``` + ## License Licensed under either of diff --git a/src/hasher.rs b/src/hasher.rs index 007c01c..53efae6 100644 --- a/src/hasher.rs +++ b/src/hasher.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "randomised_wyhash")] +mod builder; #[cfg(feature = "v4_2")] mod primes; mod read; @@ -5,6 +7,9 @@ mod secret; use core::hash::Hasher; +#[cfg(feature = "randomised_wyhash")] +pub use builder::RandomWyHashState; + #[cfg(feature = "debug")] use core::fmt::Debug; @@ -18,9 +23,21 @@ use self::{ secret::make_secret, }; -/// WyHash hasher, a fast & portable hashing algorithm. This implementation is +/// The WyHash hasher, a fast & portable hashing algorithm. This implementation is /// based on the final v4 C reference implementation, as that is compatible with /// the constants used for the current `WyRand` implementation. +/// +/// ``` +/// use wyrand::WyHash; +/// use core::hash::Hasher; +/// +/// let mut hasher = WyHash::default(); +/// +/// hasher.write_u64(5); +/// +/// assert_ne!(hasher.finish(), 5); // Should not be represented by the same value any more +/// ``` +#[cfg_attr(docsrs, doc(cfg(feature = "wyhash")))] #[derive(Clone)] pub struct WyHash { seed: u64, diff --git a/src/hasher/builder.rs b/src/hasher/builder.rs new file mode 100644 index 0000000..b0c9cf4 --- /dev/null +++ b/src/hasher/builder.rs @@ -0,0 +1,115 @@ +use core::{hash::BuildHasher, mem::MaybeUninit}; + +#[cfg(feature = "debug")] +use core::fmt::Debug; + +use getrandom::getrandom_uninit; + +use crate::WyHash; + +#[cfg_attr(docsrs, doc(cfg(feature = "randomised_wyhash")))] +#[derive(Clone, Copy)] +/// Randomised state constructor for [`WyHash`]. This builder will source entropy in order +/// to provide random seeds for [`WyHash`]. This will yield a hasher with not just a random +/// seed, but also a new random secret, granting extra protection against DOS and prediction +/// attacks. +pub struct RandomWyHashState(u64, u64); + +impl RandomWyHashState { + /// Create a new [`RandomWyHashState`] instance. Calling this method will attempt to + /// draw entropy from hardware/OS sources. + /// + /// # Panics + /// + /// This method will panic if it was unable to source enough entropy. + /// + /// # Examples + /// + /// ``` + /// use wyrand::RandomWyHashState; + /// use core::hash::BuildHasher; + /// + /// let s = RandomWyHashState::new(); + /// + /// let mut hasher = s.build_hasher(); // Creates a WyHash instance with random state + /// ``` + #[must_use] + pub fn new() -> Self { + // Don't bother zeroing as we will initialise this with random data. If the initialisation fails + // for any reason, we will panic instead of trying to continue with a fully or partially + // uninitialised buffer. This ensures the whole process is safe without the need to use an + // unsafe block. + let mut bytes = [MaybeUninit::::uninit(); core::mem::size_of::() * 2]; + + let bytes = getrandom_uninit(&mut bytes) + .expect("Failed to source entropy for WyHash randomised state"); + + let (first, second) = bytes.split_at(core::mem::size_of::()); + + let first = u64::from_ne_bytes(first.try_into().unwrap()); + let second = u64::from_ne_bytes(second.try_into().unwrap()); + + Self(first, second) + } +} + +impl BuildHasher for RandomWyHashState { + type Hasher = WyHash; + + #[inline] + fn build_hasher(&self) -> Self::Hasher { + WyHash::new(self.0, self.1) + } +} + +impl Default for RandomWyHashState { + #[inline] + fn default() -> Self { + Self::new() + } +} + +#[cfg(feature = "debug")] +impl Debug for RandomWyHashState { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("RandomisedWyHashBuilder") + .finish_non_exhaustive() + } +} + +#[cfg(test)] +mod tests { + extern crate alloc; + + use super::*; + + #[cfg(feature = "debug")] + #[test] + fn no_leaking_debug() { + use alloc::format; + + let builder = RandomWyHashState::default(); + + assert_eq!( + format!("{builder:?}"), + "RandomisedWyHashBuilder { .. }", + "Debug should not be leaking internal state" + ); + } + + #[test] + fn randomised_builder_states() { + let builder1 = RandomWyHashState::new(); + let builder2 = RandomWyHashState::new(); + + // The two builders' internal states are different to each other + assert_ne!(&builder1.0, &builder2.0); + assert_ne!(&builder1.1, &builder2.1); + + // Each builder's internal state should not be the same (hopefully). + // It is more likely that we have not initialised things correctly than + // to have the entropy source output the same bits for both fields. + assert_ne!(&builder1.0, &builder1.1); + assert_ne!(&builder2.0, &builder2.1); + } +} diff --git a/src/hasher/read.rs b/src/hasher/read.rs index bf9df8e..1ab64aa 100644 --- a/src/hasher/read.rs +++ b/src/hasher/read.rs @@ -12,17 +12,12 @@ pub(super) const fn wyread64(bits: &[u8]) -> u64 { #[inline(always)] pub(super) const fn wyread32(bits: &[u8]) -> u64 { - (bits[3] as u64) << 24 - | (bits[2] as u64) << 16 - | (bits[1] as u64) << 8 - | (bits[0] as u64) + (bits[3] as u64) << 24 | (bits[2] as u64) << 16 | (bits[1] as u64) << 8 | (bits[0] as u64) } #[inline(always)] pub(super) const fn wyread_upto_24(bits: &[u8]) -> u64 { - (bits[0] as u64) << 16 - | (bits[bits.len() >> 1] as u64) << 8 - | (bits[bits.len() - 1] as u64) + (bits[0] as u64) << 16 | (bits[bits.len() >> 1] as u64) << 8 | (bits[bits.len() - 1] as u64) } #[inline(always)] diff --git a/src/lib.rs b/src/lib.rs index c98db17..7d4b78d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,6 +12,5 @@ mod rand; mod utils; #[cfg(feature = "wyhash")] -#[cfg_attr(docsrs, doc(cfg(feature = "wyhash")))] -pub use hasher::WyHash; +pub use hasher::*; pub use rand::WyRand; diff --git a/src/rand.rs b/src/rand.rs index a5cc50d..c233cf8 100644 --- a/src/rand.rs +++ b/src/rand.rs @@ -24,7 +24,7 @@ impl WyRand { /// hardware, OS source, or from a suitable crate, like `getrandom`. #[inline] #[must_use] - pub fn new(state: u64) -> Self { + pub const fn new(state: u64) -> Self { Self { state } } @@ -124,7 +124,7 @@ mod tests { // Should be the same internal state after cloning assert_eq!( - &rng, &cloned, + &rng.state, &cloned.state, "the two RNG instances are not the same after cloning" ); @@ -132,7 +132,7 @@ mod tests { // Should no longer have the same internal state after generating a random number assert_ne!( - &rng, &cloned, + &rng.state, &cloned.state, "the two RNG instances are the same after one was used" ); } From 553ae8b30e4d58585d5b3f3fdfe7f848bd824789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gon=C3=A7alo=20Rica=20Pais=20da=20Silva?= Date: Thu, 29 Feb 2024 18:47:06 +0100 Subject: [PATCH 2/2] docs: Clarify WyHash version kinds --- src/hasher.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hasher.rs b/src/hasher.rs index 53efae6..9e44c07 100644 --- a/src/hasher.rs +++ b/src/hasher.rs @@ -24,8 +24,8 @@ use self::{ }; /// The WyHash hasher, a fast & portable hashing algorithm. This implementation is -/// based on the final v4 C reference implementation, as that is compatible with -/// the constants used for the current `WyRand` implementation. +/// based on the final v4/v4.2 C reference implementations (depending on whether the +/// `v4_2` feature flag is enabled or not). /// /// ``` /// use wyrand::WyHash;