From fe8c79d7c3a97044ffbd36476b9d7dc7037e7592 Mon Sep 17 00:00:00 2001 From: Narthana Epa Date: Sun, 23 Jul 2023 20:32:15 +1000 Subject: [PATCH] Add chi-square test and fix a typo that mane it not uniform --- Cargo.lock | 193 +++++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 3 + src/main.rs | 30 +------ src/passphrase.rs | 67 ++++++++++++++++ 4 files changed, 264 insertions(+), 29 deletions(-) create mode 100644 src/passphrase.rs diff --git a/Cargo.lock b/Cargo.lock index b8ee14a..fb756d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -60,12 +60,33 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bytemuck" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea" + [[package]] name = "cc" version = "1.0.79" @@ -110,7 +131,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.10", ] [[package]] @@ -219,24 +240,115 @@ version = "0.2.134" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "329c933548736bc49fd575ee68c89e8be4d260064184389a5b77517cddd99ffb" +[[package]] +name = "libm" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" + [[package]] name = "linux-raw-sys" version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" +[[package]] +name = "matrixmultiply" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "090126dc04f95dc0d1c1c91f61bdd474b3930ca064c1edc8a849da2c6cbe1e77" +dependencies = [ + "autocfg", + "rawpointer", +] + [[package]] name = "memchr" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "nalgebra" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d506eb7e08d6329505faa8a3a00a5dcc6de9f76e0c77e4b75763ae3c770831ff" +dependencies = [ + "approx", + "matrixmultiply", + "nalgebra-macros", + "num-complex", + "num-rational", + "num-traits", + "rand", + "rand_distr", + "simba", + "typenum", +] + +[[package]] +name = "nalgebra-macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01fcc0b8149b4632adc89ac3b7b31a12fb6099a0317a4eb2ebff574ef7de7218" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "num-complex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +dependencies = [ + "autocfg", + "libm", +] + [[package]] name = "once_cell" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1" +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + [[package]] name = "ppv-lite86" version = "0.2.16" @@ -291,6 +403,22 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + [[package]] name = "regex" version = "1.9.1" @@ -329,6 +457,7 @@ dependencies = [ "lazy_static", "rand", "regex", + "statrs", ] [[package]] @@ -345,12 +474,58 @@ dependencies = [ "windows-sys 0.45.0", ] +[[package]] +name = "safe_arch" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f398075ce1e6a179b46f51bd88d0598b92b00d3551f1a2d4ac49e771b56ac354" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "simba" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0b7840f121a46d63066ee7a99fc81dcabbc6105e437cae43528cea199b5a05f" +dependencies = [ + "approx", + "num-complex", + "num-traits", + "paste", + "wide", +] + +[[package]] +name = "statrs" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d08e5e1748192713cc281da8b16924fb46be7b0c2431854eadc785823e5696e" +dependencies = [ + "approx", + "lazy_static", + "nalgebra", + "num-traits", + "rand", +] + [[package]] name = "strsim" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.10" @@ -362,6 +537,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "typenum" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" + [[package]] name = "unicode-ident" version = "1.0.4" @@ -380,6 +561,16 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wide" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa469ffa65ef7e0ba0f164183697b89b854253fd31aeb92358b7b6155177d62f" +dependencies = [ + "bytemuck", + "safe_arch", +] + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index 0122895..ef3e730 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,3 +11,6 @@ eyre = "0.6.8" lazy_static = "1.4.0" rand = "0.8.5" regex = "1.9.1" + +[dev-dependencies] +statrs = "0.16.0" diff --git a/src/main.rs b/src/main.rs index 9eb8a03..b0b5f56 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,8 @@ +mod passphrase; mod words; use clap::Parser; use eyre::Result; -use rand::{rngs::ThreadRng, Rng}; #[derive(Parser, Debug)] #[command(author, version, about, long_about = None)] @@ -24,34 +24,8 @@ fn main() -> Result<()> { println!( "{}", - passphrase(&mut rng, &mut words, args.num_words, &args.separator) + passphrase::new(&mut rng, &mut words, args.num_words, &args.separator) ); Ok(()) } - -fn passphrase( - rng: &mut ThreadRng, - words: &mut Vec, - num_words: usize, - separator: &str, -) -> String { - if words.len() < num_words { - eprintln!( - "Your dictionary only has {} suitable words, but you asked for {} words.", - words.len(), - num_words - ); - return "".to_string(); - } - - (0..num_words).for_each(|i| { - let j = rng.gen_range(0..words.len()); - words.swap(i, j) - }); - - (0..num_words) - .map(|i| words[i].to_owned()) - .collect::>() - .join(separator) -} diff --git a/src/passphrase.rs b/src/passphrase.rs new file mode 100644 index 0000000..4a6a58d --- /dev/null +++ b/src/passphrase.rs @@ -0,0 +1,67 @@ +use rand::{rngs::ThreadRng, Rng}; + +pub fn new( + rng: &mut ThreadRng, + words: &mut Vec, + num_words: usize, + separator: &str, +) -> String { + if words.len() < num_words { + eprintln!( + "Your dictionary only has {} suitable words, but you asked for {} words.", + words.len(), + num_words + ); + return "".to_string(); + } + + (0..num_words).for_each(|i| { + let j = rng.gen_range(i..words.len()); + words.swap(i, j) + }); + + (0..num_words) + .map(|i| words[i].to_owned()) + .collect::>() + .join(separator) +} + +mod test { + #[test] + fn read_words() { + use crate::{passphrase, words}; + use statrs::distribution::{ChiSquared, ContinuousCDF}; + use std::collections::HashMap; + + let n = 12_000_000; + let mut rng = rand::thread_rng(); + + // this test file has 4 words, which can have 24 permutations + let words = words::list(Some("src/fixtures/test")).unwrap(); + + let mut histogram: HashMap = HashMap::new(); + (1..n).for_each(|_| { + let mut words = words.clone(); + let s = passphrase::new(&mut rng, &mut words, 4, " "); + *histogram.entry(s).or_insert(0) += 1; + }); + + assert_eq!(histogram.len(), 24); + + let expected_frequency = n as f64 / 24.0; + let chi_squared_stat: f64 = histogram + .iter() + .map(|(_, v)| (*v as f64 - expected_frequency).powi(2) / expected_frequency) + .sum(); + let df = ((2 - 1) * (24 - 1)) as f64; + let dist = ChiSquared::new(df).unwrap(); + let p = dist.cdf(chi_squared_stat); + + eprintln!("χ^2: {}", chi_squared_stat); + eprintln!("p: {}", p); + + // the p-value should be greater than 0.05 so that we can't reject the null hypothesis + // if we can reject the null hypothesis, then the passphrase generator is not uniform + assert_eq!(p > 0.05, true); + } +}