Skip to content

Commit

Permalink
Add chi-square test and fix a typo that mane it not uniform
Browse files Browse the repository at this point in the history
  • Loading branch information
triarius committed Jul 23, 2023
1 parent 7e313f0 commit fe8c79d
Show file tree
Hide file tree
Showing 4 changed files with 264 additions and 29 deletions.
193 changes: 192 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@ eyre = "0.6.8"
lazy_static = "1.4.0"
rand = "0.8.5"
regex = "1.9.1"

[dev-dependencies]
statrs = "0.16.0"
30 changes: 2 additions & 28 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
mod passphrase;
mod words;

use clap::Parser;
use eyre::Result;
use rand::{rngs::ThreadRng, Rng};

#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
Expand All @@ -24,34 +24,8 @@ fn main() -> Result<()> {

println!(
"{}",
passphrase(&mut rng, &mut words, args.num_words, &args.separator)
passphrase::new(&mut rng, &mut words, args.num_words, &args.separator)
);

Ok(())
}

fn passphrase(
rng: &mut ThreadRng,
words: &mut Vec<String>,
num_words: usize,
separator: &str,
) -> String {
if words.len() < num_words {
eprintln!(
"Your dictionary only has {} suitable words, but you asked for {} words.",
words.len(),
num_words
);
return "".to_string();
}

(0..num_words).for_each(|i| {
let j = rng.gen_range(0..words.len());
words.swap(i, j)
});

(0..num_words)
.map(|i| words[i].to_owned())
.collect::<Vec<String>>()
.join(separator)
}
67 changes: 67 additions & 0 deletions src/passphrase.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
use rand::{rngs::ThreadRng, Rng};

pub fn new(
rng: &mut ThreadRng,
words: &mut Vec<String>,
num_words: usize,
separator: &str,
) -> String {
if words.len() < num_words {
eprintln!(
"Your dictionary only has {} suitable words, but you asked for {} words.",
words.len(),
num_words
);
return "".to_string();
}

(0..num_words).for_each(|i| {
let j = rng.gen_range(i..words.len());
words.swap(i, j)
});

(0..num_words)
.map(|i| words[i].to_owned())
.collect::<Vec<String>>()
.join(separator)
}

mod test {
#[test]
fn read_words() {
use crate::{passphrase, words};
use statrs::distribution::{ChiSquared, ContinuousCDF};
use std::collections::HashMap;

let n = 12_000_000;
let mut rng = rand::thread_rng();

// this test file has 4 words, which can have 24 permutations
let words = words::list(Some("src/fixtures/test")).unwrap();

let mut histogram: HashMap<String, u32> = HashMap::new();
(1..n).for_each(|_| {
let mut words = words.clone();
let s = passphrase::new(&mut rng, &mut words, 4, " ");
*histogram.entry(s).or_insert(0) += 1;
});

assert_eq!(histogram.len(), 24);

let expected_frequency = n as f64 / 24.0;
let chi_squared_stat: f64 = histogram
.iter()
.map(|(_, v)| (*v as f64 - expected_frequency).powi(2) / expected_frequency)
.sum();
let df = ((2 - 1) * (24 - 1)) as f64;
let dist = ChiSquared::new(df).unwrap();
let p = dist.cdf(chi_squared_stat);

eprintln!("χ^2: {}", chi_squared_stat);
eprintln!("p: {}", p);

// the p-value should be greater than 0.05 so that we can't reject the null hypothesis
// if we can reject the null hypothesis, then the passphrase generator is not uniform
assert_eq!(p > 0.05, true);
}
}

0 comments on commit fe8c79d

Please sign in to comment.