diff --git a/Cargo.toml b/Cargo.toml index 4096866..c5e6c48 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,8 @@ document-features = "0.2.10" clap = { version = "4.4.8", features = ["derive"] } futures = "0.3.29" tracing-subscriber = "0.3" +ctrlc = "3.4.5" +histo = "1.0.0" rayon = "1.5" dashmap = "6.1" diff --git a/examples/count_ips_close_to_key.rs b/examples/count_ips_close_to_key.rs new file mode 100644 index 0000000..2d40e7d --- /dev/null +++ b/examples/count_ips_close_to_key.rs @@ -0,0 +1,159 @@ +/** + * Counts all IP addresses around a random target ID and counts the number of hits, each IP gets. + * Does this by initializing a new DHT node for each lookups to reach the target from different directions. + * + * The result shows how sloppy the lookup algorithms are. + * +Prints a histogram with the collected nodes +First column are the buckets indicating the hit rate. 3 .. 12 summerizes the nodes that get hit with a probability of 3 to 12% in each lookup. +Second column indicates the number of nodes that this bucket contains. [19] means 19 nodes got hit with a probability of 3 to 12%. +Third column is a visualization of the number of nodes [19]. + +Example1: +3 .. 12 [ 19 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ +Within one lookup, 19 nodes got hit in 3 to 12% of the cases. These are rarely found therefore. + +Example2: +84 .. 93 [ 15 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ +Within one lookup, 15 nodes got hit in 84 to 93% of the cases. These nodes are therefore found in almost all lookups. + +Full example: +3 .. 12 [ 19 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ +12 .. 21 [ 2 ]: ∎∎ +21 .. 30 [ 3 ]: ∎∎∎ +30 .. 39 [ 2 ]: ∎∎ +39 .. 48 [ 3 ]: ∎∎∎ +48 .. 57 [ 0 ]: +57 .. 66 [ 0 ]: +66 .. 75 [ 0 ]: +75 .. 84 [ 1 ]: ∎ +84 .. 93 [ 15 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ +*/ +use histo::Histogram; +use mainline::{Dht, Id, Node}; +use std::{ + collections::{HashMap, HashSet}, + net::IpAddr, + sync::mpsc::channel, +}; +use tracing::Level; + +const K: usize = 20; // Not really k but we take the k closest nodes into account. +const MAX_DISTANCE: u8 = 150; // Health check to not include outrageously distant nodes. +const USE_RANDOM_BOOTSTRAP_NODES: bool = false; + +fn main() { + tracing_subscriber::fmt().with_max_level(Level::WARN).init(); + + let target = Id::random(); + let mut ip_hits: HashMap = HashMap::new(); + let (tx_interrupted, rx_interrupted) = channel(); + + println!("Count all IP addresses around a random target_key={target} k={K} max_distance={MAX_DISTANCE} random_boostrap={USE_RANDOM_BOOTSTRAP_NODES}."); + println!("Press CTRL+C to show the histogram"); + println!(); + + ctrlc::set_handler(move || { + println!(); + println!("Received Ctrl+C! Finishing current lookup. Hold on..."); + tx_interrupted.send(()).unwrap(); + }) + .expect("Error setting Ctrl-C handler"); + + let mut last_nodes: HashSet = HashSet::new(); + let mut lookup_count = 0; + while rx_interrupted.try_recv().is_err() { + lookup_count += 1; + let mut dht = init_dht(USE_RANDOM_BOOTSTRAP_NODES); + let nodes = dht.find_node(target).unwrap(); + let nodes: Vec = nodes + .into_iter() + .filter(|node| target.distance(node.id()) < MAX_DISTANCE) + .collect(); + let closest_nodes: Vec = nodes.into_iter().take(K).collect(); + let sockets: HashSet = closest_nodes + .iter() + .map(|node| node.address().ip()) + .collect(); + for socket in sockets.iter() { + let previous = ip_hits.get(socket); + match previous { + Some(val) => { + ip_hits.insert(socket.clone(), val + 1); + } + None => { + ip_hits.insert(socket.clone(), 1); + } + }; + } + + if closest_nodes.is_empty() { + continue; + } + let closest_node = closest_nodes.first().unwrap(); + let closest_distance = target.distance(closest_node.id()); + let furthest_node = closest_nodes.last().unwrap(); + let furthest_distance = target.distance(furthest_node.id()); + + let overlap_with_last_lookup: HashSet = sockets + .intersection(&last_nodes) + .map(|ip| ip.clone()) + .collect(); + let overlap = overlap_with_last_lookup.len() as f64 / K as f64; + last_nodes = sockets; + println!( + "lookup={:02} Ips found {}. Closest node distance: {}, furthest node distance: {}, overlap with previous lookup {}%", + lookup_count, + ip_hits.len(), + closest_distance, + furthest_distance, + (overlap*100 as f64) as usize + ); + dht.shutdown(); + } + + println!(); + println!("Histogram"); + print_histogram(ip_hits, lookup_count); +} + +fn print_histogram(hits: HashMap, lookup_count: usize) { + /* + + */ + let mut histogram = Histogram::with_buckets(10); + let percents: HashMap = hits + .into_iter() + .map(|(ip, hits)| { + let percent = (hits as f32 / lookup_count as f32) * 100 as f32; + (ip, percent as u64) + }) + .collect(); + + for (_, percent) in percents.iter() { + histogram.add(percent.clone()); + } + + println!("{}", histogram); +} + +fn get_random_boostrap_nodes2() -> Vec { + let mut dht = Dht::client().unwrap(); + let nodes = dht.find_node(Id::random()).unwrap(); + dht.shutdown(); + let addrs: Vec = nodes + .into_iter() + .map(|node| node.address().to_string()) + .collect(); + let slice: Vec = addrs[..8].into_iter().map(|va| va.clone()).collect(); + slice +} + +fn init_dht(use_random_boostrap_nodes: bool) -> Dht { + if use_random_boostrap_nodes { + let bootstrap = get_random_boostrap_nodes2(); + return Dht::builder().bootstrap(&bootstrap).build().unwrap(); + } else { + Dht::client().unwrap() + } +}