From 79eb3e6e3bd0443caa2691f90a391c41545fb221 Mon Sep 17 00:00:00 2001 From: nazeh Date: Wed, 23 Oct 2024 19:05:06 +0300 Subject: [PATCH] validate dht size estimation error margin <= %50 --- src/common/id.rs | 14 ----- src/dht.rs | 3 - src/rpc.rs | 9 +++ src/rpc/closest_nodes.rs | 132 ++++++++++++++++++++++++++++++++++----- 4 files changed, 125 insertions(+), 33 deletions(-) diff --git a/src/common/id.rs b/src/common/id.rs index f49a184d..1d243813 100644 --- a/src/common/id.rs +++ b/src/common/id.rs @@ -121,20 +121,6 @@ impl Id { } } } - - /// Returns the number of intervals in the keyspace if divided by the distance - /// between this id and a given `target`. - /// - /// Useful to estimate the Dht size see [crate::ClosestNodes::dht_size_estimate] - pub fn keyspace_intervals(&self, target: Id) -> usize { - let xor = self.xor(&target); - - // Round up the lower 4 bytes to get a u128 from u160. - let distance = - u128::from_be_bytes(xor.as_bytes()[0..16].try_into().expect("infallible")) + 1; - - (u128::MAX / distance) as usize - } } fn first_21_bits(bytes: &[u8]) -> [u8; 3] { diff --git a/src/dht.rs b/src/dht.rs index 73eb5656..a168190e 100644 --- a/src/dht.rs +++ b/src/dht.rs @@ -381,9 +381,6 @@ pub struct Info { /// Local UDP socket address that this node is listening on. pub local_address: Result, /// An estimate of the Dht size. - /// - /// Calculated as the average of the results of calling [RoutingTable::estimate_dht_size] on the - /// responding nodes of each get queries done in the background. pub dht_size_estimate: usize, } diff --git a/src/rpc.rs b/src/rpc.rs index 8fff4286..6b3abb0a 100644 --- a/src/rpc.rs +++ b/src/rpc.rs @@ -582,6 +582,15 @@ impl Rpc { if self.routing_table.is_empty() && self.last_table_refresh.elapsed() > REFRESH_TABLE_INTERVAL { + // Make a random query, to help the dht size estimation. + let target = Id::random(); + self.get( + target, + RequestTypeSpecific::FindNode(FindNodeRequestArguments { target }), + None, + None, + ); + self.last_table_refresh = Instant::now(); self.populate(); } diff --git a/src/rpc/closest_nodes.rs b/src/rpc/closest_nodes.rs index 45042a5e..6e6eda53 100644 --- a/src/rpc/closest_nodes.rs +++ b/src/rpc/closest_nodes.rs @@ -1,6 +1,6 @@ -use std::vec::IntoIter; +use std::{convert::TryInto, vec::IntoIter}; -use crate::{Id, Node}; +use crate::{common::MAX_BUCKET_SIZE_K, Id, Node}; #[derive(Debug, Clone)] pub struct ClosestNodes { @@ -29,8 +29,17 @@ impl ClosestNodes { // === Public Methods === pub fn add(&mut self, node: Node) { - match self.nodes.binary_search_by(|item| item.id.cmp(&node.id)) { - Ok(pos) | Err(pos) => self.nodes.insert(pos, node), + let seek = node.id.xor(&self.target); + + match self.nodes.binary_search_by(|prope| { + if prope.id == node.id { + std::cmp::Ordering::Equal + } else { + prope.id.xor(&self.target).cmp(&seek) + } + }) { + Err(pos) => self.nodes.insert(pos, node), + _ => {} } } @@ -91,18 +100,29 @@ impl ClosestNodes { return 0; }; - self.nodes - .iter() - .map(|n| n.id) - .enumerate() - .fold(0, |mut sum, (idx, id)| { - let intervals = id.keyspace_intervals(self.target); - let estimated_n = intervals.saturating_mul(idx + 1); - - sum += estimated_n; - sum - }) - / self.nodes.len() + let mut sum: usize = 0; + let mut count = 0; + + for node in &self.nodes { + if count >= MAX_BUCKET_SIZE_K { + break; + } + + count += 1; + + let xor = node.id.xor(&self.target); + + // Round up the lower 4 bytes to get a u128 from u160. + let distance = + u128::from_be_bytes(xor.as_bytes()[0..16].try_into().expect("infallible")) + 1; + + let intervals = (u128::MAX / distance) as usize; + let estimated_n = intervals.saturating_mul(count); + + sum = sum.saturating_add(estimated_n); + } + + (sum / count) as usize } } @@ -123,3 +143,83 @@ impl<'a> IntoIterator for &'a ClosestNodes { self.nodes.iter() } } + +#[cfg(test)] +mod tests { + use std::collections::BTreeMap; + + use super::*; + + #[test] + fn add() { + let target = Id::random(); + + let mut closest_nodes = ClosestNodes::new(target); + + for _ in 0..10 { + let node = Node::random(); + closest_nodes.add(node.clone()); + closest_nodes.add(node); + } + + assert_eq!(closest_nodes.nodes().len(), 10); + + let distances = closest_nodes + .nodes() + .iter() + .map(|n| n.id.distance(&target)) + .collect::>(); + + let mut sorted = distances.clone(); + sorted.sort(); + + assert_eq!(sorted, distances); + } + + #[test] + fn simulation() { + let lookups = 10; + let acceptable_margin = 0.6; + + let tests = [2500, 25000, 250000]; + + for dht_size in tests { + let estimate = simulate(dht_size, lookups) as f64; + + let margin = (estimate - (dht_size as f64)).abs() / dht_size as f64; + + assert!(margin <= acceptable_margin); + } + } + + fn simulate(dht_size: usize, lookups: usize) -> usize { + let mut nodes = BTreeMap::new(); + + // Bootstrap + for _ in 0..dht_size { + let node = Node::random(); + nodes.insert(node.id, node); + } + + let mut estimates = vec![]; + + for _ in 0..lookups.min(dht_size) { + let target = Id::random(); + + let mut closest_nodes = ClosestNodes::new(target); + + for (_, node) in nodes.range(target..).take(20) { + closest_nodes.add(node.clone()); + } + for (_, node) in nodes.range(target..).rev().take(20) { + closest_nodes.add(node.clone()); + } + + let estimate = closest_nodes.dht_size_estimate(); + + estimates.push(estimate) + } + + estimates.iter().sum::() / estimates.len() + } +}