diff --git a/docs/dht_size_estimate/README.md b/docs/dht_size_estimate/README.md index db79d137..6155a56f 100644 --- a/docs/dht_size_estimate/README.md +++ b/docs/dht_size_estimate/README.md @@ -57,7 +57,7 @@ The final Dht size estimation is the average of `en_1 + en_2 + .. + en_n` Running this [simulation](./src/main.rs) for 20 million nodes and a after 12 lookups, we observe: -- Mean estimate: 2,004,408 nodes +- Mean estimate: 1,998,382 nodes - Standard deviation: 10% Meaning that after 12 lookups, you can be confident you are not overestimating the Dht size by more than 10%, diff --git a/docs/dht_size_estimate/plot.png b/docs/dht_size_estimate/plot.png index cf7f9929..31fac34e 100644 Binary files a/docs/dht_size_estimate/plot.png and b/docs/dht_size_estimate/plot.png differ diff --git a/src/rpc/closest_nodes.rs b/src/rpc/closest_nodes.rs index f405e22d..baf2e652 100644 --- a/src/rpc/closest_nodes.rs +++ b/src/rpc/closest_nodes.rs @@ -2,8 +2,6 @@ use std::{convert::TryInto, vec::IntoIter}; use crate::{common::MAX_BUCKET_SIZE_K, Id, Node}; -const CORRECTION_FACTOR: f64 = 1.0544; - #[derive(Debug, Clone)] /// Manage closest nodes found in a query. /// @@ -61,25 +59,31 @@ impl ClosestNodes { return 0; }; - let sum = self.nodes.iter().take(20).enumerate().fold( - 0, - |sum: usize, (i, node): (usize, &Node)| { - let xor = node.id.xor(&self.target); + let mut sum = 0; + let mut count = 0; + + // Ignoring the first node, as that gives the best result in simulations. + for node in &self.nodes[1..] { + count += 1; + + let xor = node.id.xor(&self.target); - // Round up the lower 4 bytes to get a u128 from u160. - let distance = - u128::from_be_bytes(xor.as_bytes()[0..16].try_into().expect("infallible")); + // Round up the lower 4 bytes to get a u128 from u160. + let distance = + u128::from_be_bytes(xor.as_bytes()[0..16].try_into().expect("infallible")); - let intervals = (u128::MAX / distance) as usize; - let estimated_n = intervals.saturating_mul(i); + let intervals = (u128::MAX / distance) as usize; - sum + estimated_n as usize - }, - ); + let estimated_n = intervals * count; - let count = MAX_BUCKET_SIZE_K.min(self.nodes.len()); + sum += estimated_n as usize; + + if count >= MAX_BUCKET_SIZE_K { + break; + } + } - (CORRECTION_FACTOR * (sum / count) as f64) as usize + (sum / count) as usize } } @@ -113,13 +117,13 @@ mod tests { let mut closest_nodes = ClosestNodes::new(target); - for _ in 0..10 { + for _ in 0..100 { let node = Node::random(); closest_nodes.add(node.clone()); closest_nodes.add(node); } - assert_eq!(closest_nodes.nodes().len(), 10); + assert_eq!(closest_nodes.nodes().len(), 100); let distances = closest_nodes .nodes()