Skip to content

Commit

Permalink
Merge pull request #33 from SeverinAlexB/sloppyness-test
Browse files Browse the repository at this point in the history
Example: Count all IP addresses close to a target key
  • Loading branch information
Nuhvi authored Nov 27, 2024
2 parents 5ff1391 + fd05a52 commit d6fc36e
Show file tree
Hide file tree
Showing 2 changed files with 161 additions and 0 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ document-features = "0.2.10"
clap = { version = "4.4.8", features = ["derive"] }
futures = "0.3.29"
tracing-subscriber = "0.3"
ctrlc = "3.4.5"
histo = "1.0.0"
rayon = "1.5"
dashmap = "6.1"

Expand Down
159 changes: 159 additions & 0 deletions examples/count_ips_close_to_key.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
/**
* Counts all IP addresses around a random target ID and counts the number of hits, each IP gets.
* Does this by initializing a new DHT node for each lookups to reach the target from different directions.
*
* The result shows how sloppy the lookup algorithms are.
*
Prints a histogram with the collected nodes
First column are the buckets indicating the hit rate. 3 .. 12 summerizes the nodes that get hit with a probability of 3 to 12% in each lookup.
Second column indicates the number of nodes that this bucket contains. [19] means 19 nodes got hit with a probability of 3 to 12%.
Third column is a visualization of the number of nodes [19].
Example1:
3 .. 12 [ 19 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎
Within one lookup, 19 nodes got hit in 3 to 12% of the cases. These are rarely found therefore.
Example2:
84 .. 93 [ 15 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎
Within one lookup, 15 nodes got hit in 84 to 93% of the cases. These nodes are therefore found in almost all lookups.
Full example:
3 .. 12 [ 19 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎
12 .. 21 [ 2 ]: ∎∎
21 .. 30 [ 3 ]: ∎∎∎
30 .. 39 [ 2 ]: ∎∎
39 .. 48 [ 3 ]: ∎∎∎
48 .. 57 [ 0 ]:
57 .. 66 [ 0 ]:
66 .. 75 [ 0 ]:
75 .. 84 [ 1 ]: ∎
84 .. 93 [ 15 ]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎
*/
use histo::Histogram;
use mainline::{Dht, Id, Node};
use std::{
collections::{HashMap, HashSet},
net::IpAddr,
sync::mpsc::channel,
};
use tracing::Level;

const K: usize = 20; // Not really k but we take the k closest nodes into account.
const MAX_DISTANCE: u8 = 150; // Health check to not include outrageously distant nodes.
const USE_RANDOM_BOOTSTRAP_NODES: bool = false;

fn main() {
tracing_subscriber::fmt().with_max_level(Level::WARN).init();

let target = Id::random();
let mut ip_hits: HashMap<IpAddr, u16> = HashMap::new();
let (tx_interrupted, rx_interrupted) = channel();

println!("Count all IP addresses around a random target_key={target} k={K} max_distance={MAX_DISTANCE} random_boostrap={USE_RANDOM_BOOTSTRAP_NODES}.");
println!("Press CTRL+C to show the histogram");
println!();

ctrlc::set_handler(move || {
println!();
println!("Received Ctrl+C! Finishing current lookup. Hold on...");
tx_interrupted.send(()).unwrap();
})
.expect("Error setting Ctrl-C handler");

let mut last_nodes: HashSet<IpAddr> = HashSet::new();
let mut lookup_count = 0;
while rx_interrupted.try_recv().is_err() {
lookup_count += 1;
let mut dht = init_dht(USE_RANDOM_BOOTSTRAP_NODES);
let nodes = dht.find_node(target).unwrap();
let nodes: Vec<Node> = nodes
.into_iter()
.filter(|node| target.distance(node.id()) < MAX_DISTANCE)
.collect();
let closest_nodes: Vec<Node> = nodes.into_iter().take(K).collect();
let sockets: HashSet<IpAddr> = closest_nodes
.iter()
.map(|node| node.address().ip())
.collect();
for socket in sockets.iter() {
let previous = ip_hits.get(socket);
match previous {
Some(val) => {
ip_hits.insert(socket.clone(), val + 1);
}
None => {
ip_hits.insert(socket.clone(), 1);
}
};
}

if closest_nodes.is_empty() {
continue;
}
let closest_node = closest_nodes.first().unwrap();
let closest_distance = target.distance(closest_node.id());
let furthest_node = closest_nodes.last().unwrap();
let furthest_distance = target.distance(furthest_node.id());

let overlap_with_last_lookup: HashSet<IpAddr> = sockets
.intersection(&last_nodes)
.map(|ip| ip.clone())
.collect();
let overlap = overlap_with_last_lookup.len() as f64 / K as f64;
last_nodes = sockets;
println!(
"lookup={:02} Ips found {}. Closest node distance: {}, furthest node distance: {}, overlap with previous lookup {}%",
lookup_count,
ip_hits.len(),
closest_distance,
furthest_distance,
(overlap*100 as f64) as usize
);
dht.shutdown();
}

println!();
println!("Histogram");
print_histogram(ip_hits, lookup_count);
}

fn print_histogram(hits: HashMap<IpAddr, u16>, lookup_count: usize) {
/*
*/
let mut histogram = Histogram::with_buckets(10);
let percents: HashMap<IpAddr, u64> = hits
.into_iter()
.map(|(ip, hits)| {
let percent = (hits as f32 / lookup_count as f32) * 100 as f32;
(ip, percent as u64)
})
.collect();

for (_, percent) in percents.iter() {
histogram.add(percent.clone());
}

println!("{}", histogram);
}

fn get_random_boostrap_nodes2() -> Vec<String> {
let mut dht = Dht::client().unwrap();
let nodes = dht.find_node(Id::random()).unwrap();
dht.shutdown();
let addrs: Vec<String> = nodes
.into_iter()
.map(|node| node.address().to_string())
.collect();
let slice: Vec<String> = addrs[..8].into_iter().map(|va| va.clone()).collect();
slice
}

fn init_dht(use_random_boostrap_nodes: bool) -> Dht {
if use_random_boostrap_nodes {
let bootstrap = get_random_boostrap_nodes2();
return Dht::builder().bootstrap(&bootstrap).build().unwrap();
} else {
Dht::client().unwrap()
}
}

0 comments on commit d6fc36e

Please sign in to comment.