diff --git a/rsky-pds/src/repo/mst/mod.rs b/rsky-pds/src/repo/mst/mod.rs index d383a37..7acb961 100644 --- a/rsky-pds/src/repo/mst/mod.rs +++ b/rsky-pds/src/repo/mst/mod.rs @@ -12,7 +12,6 @@ * For atproto, we use SHA-256 as the key hashing algorithm, and ~4 fanout * (2-bits of zero per layer). */ - use crate::common; use crate::common::ipld; use crate::repo::block_map::BlockMap; @@ -195,10 +194,10 @@ impl Iterator for NodeIterReachable { /// treeEntry are elements of nodeData's Entries. #[derive(Debug, PartialEq, Clone, Deserialize, Serialize)] pub struct TreeEntry { - pub p: u8, // count of characters shared with previous path/key in tree + pub p: u8, // count of characters shared with previous path/key in tree #[serde(with = "serde_bytes")] - pub k: Vec, // remaining part of path/key (appended to "previous key") - pub v: Cid, // CID pointer at this path/key + pub k: Vec, // remaining part of path/key (appended to "previous key") + pub v: Cid, // CID pointer at this path/key pub t: Option, // [optional] pointer to lower-level subtree to the "right" of this path/key entry } @@ -647,8 +646,7 @@ impl MST { (Some(NodeEntry::MST(mut p)), Some(NodeEntry::MST(n))) => { let merged = p.append_merge(n)?; let mut new_tree_entries: Vec = Vec::new(); - new_tree_entries - .append(&mut self.slice(Some(0), Some(index - 1))?); + new_tree_entries.append(&mut self.slice(Some(0), Some(index - 1))?); new_tree_entries.push(NodeEntry::MST(merged)); new_tree_entries.append(&mut self.slice(Some(index + 2), None)?); self.new_tree(new_tree_entries) @@ -912,11 +910,7 @@ impl MST { pub fn create_child(&mut self) -> Result { let layer = self.get_layer()?; - MST::create( - self.storage.clone(), - Some(Vec::new()), - Some(layer - 1), - ) + MST::create(self.storage.clone(), Some(Vec::new()), Some(layer - 1)) } pub fn create_parent(mut self) -> Result { @@ -936,14 +930,10 @@ impl MST { /// finds index of first leaf node that is greater than or equal to the value pub fn find_gt_or_equal_leaf_index(&mut self, key: &String) -> Result { let entries = self.get_entries()?; - let maybe_index = entries - .iter() - .position(|entry| { - match entry { - NodeEntry::MST(_) => false, - NodeEntry::Leaf(entry) => entry.key >= *key - } - }); + let maybe_index = entries.iter().position(|entry| match entry { + NodeEntry::MST(_) => false, + NodeEntry::Leaf(entry) => entry.key >= *key, + }); // if we can't find, we're on the end if let Some(i) = maybe_index { Ok(i as isize) @@ -1187,14 +1177,46 @@ pub mod walker; #[cfg(test)] mod tests { + use super::util::*; use super::*; use anyhow::Result; - use super::util::*; + use rand::seq::SliceRandom; + use rand::thread_rng; fn string_to_vec_u8(input: &str) -> Vec { input.as_bytes().to_vec() } + #[test] + fn adds_records() -> Result<()> { + let mut storage = + SqlRepoReader::new(None, "did:example:123456789abcdefghi".to_string(), None); + let mapping = generate_bulk_data_keys(254, Some(&mut storage))?; + let mut mst = MST::create(storage, None, None)?; + let mut rng = thread_rng(); + + let mut entries = mapping + .iter() + .map(|e| (e.0.clone(), e.1.clone())) + .collect::>(); + entries.shuffle(&mut rng); + + for entry in &entries { + let start = std::time::Instant::now(); + mst = mst.add(&entry.0, entry.1, None)?; + let duration = start.elapsed(); + println!("Time:{:?}, Key:{}, Cid:{:?}", duration, &entry.0, entry.1); + } + for entry in entries { + let got = mst.get(&entry.0)?; + assert_eq!(Some(entry.1), got); + } + let total_size = mst.leaf_count()?; + assert_eq!(total_size, 254); + + Ok(()) + } + #[test] fn test_leading_zeros() -> Result<()> { let msg = "MST 'depth' computation (SHA-256 leading zeros)"; @@ -1232,7 +1254,12 @@ mod tests { // Helper macro to handle assertions macro_rules! assert_prefix_len { ($a:expr, $b:expr, $expected:expr) => { - assert_eq!(count_prefix_len($a.to_string(), $b.to_string())?, $expected, "{}", msg); + assert_eq!( + count_prefix_len($a.to_string(), $b.to_string())?, + $expected, + "{}", + msg + ); }; } @@ -1259,13 +1286,18 @@ mod tests { // Testing string lengths (Note: length in bytes, not characters) assert_eq!("jalapeΓ±o".len(), 9, "{}", msg); // 9 bytes in Rust, same as Go - assert_eq!("πŸ’©".len(), 4, "{}", msg); // 4 bytes in Rust, same as Go - assert_eq!("πŸ‘©β€πŸ‘§β€πŸ‘§".len(), 18, "{}", msg); // 18 bytes in Rust, same as Go + assert_eq!("πŸ’©".len(), 4, "{}", msg); // 4 bytes in Rust, same as Go + assert_eq!("πŸ‘©β€πŸ‘§β€πŸ‘§".len(), 18, "{}", msg); // 18 bytes in Rust, same as Go // Helper macro to handle assertions for count_prefix_len macro_rules! assert_prefix_len { ($a:expr, $b:expr, $expected:expr) => { - assert_eq!(count_prefix_len($a.to_string(), $b.to_string())?, $expected, "{}", msg); + assert_eq!( + count_prefix_len($a.to_string(), $b.to_string())?, + $expected, + "{}", + msg + ); }; } @@ -1334,7 +1366,13 @@ mod tests { assert!(result.is_ok()); // Allows URL-safe chars - let valid_keys = vec!["coll/key0", "coll/key_", "coll/key:", "coll/key.", "coll/key-"]; + let valid_keys = vec![ + "coll/key0", + "coll/key_", + "coll/key:", + "coll/key.", + "coll/key-", + ]; for key in valid_keys { let result = mst.add(&key.to_string(), cid1, None); assert!(result.is_ok(), "Key '{}' should be valid", key); @@ -1352,7 +1390,10 @@ mod tests { let mut mst = MST::create(storage, None, None)?; assert_eq!(mst.clone().leaf_count()?, 0); - assert_eq!(mst.get_pointer()?.to_string(), "bafyreie5737gdxlw5i64vzichcalba3z2v5n6icifvx5xytvske7mr3hpm"); + assert_eq!( + mst.get_pointer()?.to_string(), + "bafyreie5737gdxlw5i64vzichcalba3z2v5n6icifvx5xytvske7mr3hpm" + ); Ok(()) } @@ -1366,7 +1407,10 @@ mod tests { mst = mst.add(&"com.example.record/3jqfcqzm3fo2j".to_string(), cid1, None)?; assert_eq!(mst.clone().leaf_count()?, 1); - assert_eq!(mst.get_pointer()?.to_string(), "bafyreibj4lsc3aqnrvphp5xmrnfoorvru4wynt6lwidqbm2623a6tatzdu"); + assert_eq!( + mst.get_pointer()?.to_string(), + "bafyreibj4lsc3aqnrvphp5xmrnfoorvru4wynt6lwidqbm2623a6tatzdu" + ); Ok(()) } @@ -1381,7 +1425,10 @@ mod tests { mst = mst.add(&"com.example.record/3jqfcqzm3fx2j".to_string(), cid1, None)?; assert_eq!(mst.clone().leaf_count()?, 1); assert_eq!(mst.clone().layer, Some(2)); - assert_eq!(mst.get_pointer()?.to_string(), "bafyreih7wfei65pxzhauoibu3ls7jgmkju4bspy4t2ha2qdjnzqvoy33ai"); + assert_eq!( + mst.get_pointer()?.to_string(), + "bafyreih7wfei65pxzhauoibu3ls7jgmkju4bspy4t2ha2qdjnzqvoy33ai" + ); Ok(()) } @@ -1399,7 +1446,10 @@ mod tests { let mut mst = mst.add(&"com.example.record/3jqfcqzm3ft2j".to_string(), cid1, None)?; // level 0 let mut mst = mst.add(&"com.example.record/3jqfcqzm4fc2j".to_string(), cid1, None)?; // level 0 assert_eq!(mst.clone().leaf_count()?, 5); - assert_eq!(mst.get_pointer()?.to_string(), "bafyreicmahysq4n6wfuxo522m6dpiy7z7qzym3dzs756t5n7nfdgccwq7m"); + assert_eq!( + mst.get_pointer()?.to_string(), + "bafyreicmahysq4n6wfuxo522m6dpiy7z7qzym3dzs756t5n7nfdgccwq7m" + ); Ok(()) } @@ -1462,7 +1512,7 @@ mod tests { let mut mst = mst.add(&"com.example.record/3jqfcqzm3fr2j".to_string(), cid1, None)?; // C; level 0 let mut mst = mst.add(&"com.example.record/3jqfcqzm3fs2j".to_string(), cid1, None)?; // D; level 1 let mut mst = mst.add(&"com.example.record/3jqfcqzm3ft2j".to_string(), cid1, None)?; // E; level 0 - // GAP for F + // GAP for F let mut mst = mst.add(&"com.example.record/3jqfcqzm3fz2j".to_string(), cid1, None)?; // G; level 0 let mut mst = mst.add(&"com.example.record/3jqfcqzm4fc2j".to_string(), cid1, None)?; // H; level 0 let mut mst = mst.add(&"com.example.record/3jqfcqzm4fd2j".to_string(), cid1, None)?; // I; level 1 @@ -1544,4 +1594,4 @@ mod tests { Ok(()) } -} \ No newline at end of file +} diff --git a/rsky-pds/src/repo/mst/util.rs b/rsky-pds/src/repo/mst/util.rs index 37d3d87..aaf3168 100644 --- a/rsky-pds/src/repo/mst/util.rs +++ b/rsky-pds/src/repo/mst/util.rs @@ -1,11 +1,17 @@ use super::{Leaf, NodeData, NodeEntry, TreeEntry, MST}; +use crate::common; use crate::common::ipld; +use crate::common::ipld::cid_for_cbor; +use crate::common::tid::Ticker; use crate::storage::SqlRepoReader; use anyhow::{anyhow, Result}; use lazy_static::lazy_static; use lexicon_cid::Cid; +use rand::{thread_rng, Rng}; use regex::Regex; +use serde_json::json; use sha2::{Digest, Sha256}; +use std::collections::BTreeMap; use std::str; fn is_valid_chars(input: &str) -> bool { @@ -176,3 +182,44 @@ pub fn leading_zeros_on_hash(key: &Vec) -> Result { } Ok(leading_zeros) } + +pub type IdMapping = BTreeMap; + +pub fn random_cid(storage: &mut Option<&mut SqlRepoReader>) -> Result { + let record = json!({ "test": random_str(50) }); + let cid = cid_for_cbor(&record)?; + let bytes = common::struct_to_cbor(record)?; + if let Some(ref mut storage) = storage { + storage.blocks.set(cid, bytes); + } + Ok(cid) +} + +pub fn generate_bulk_data_keys( + count: usize, + mut blockstore: Option<&mut SqlRepoReader>, +) -> Result { + let mut obj: IdMapping = BTreeMap::new(); + for _ in 0..count { + let key = format!( + "com.example.record/{}", + Ticker::new().next(None).to_string() + ); + obj.insert(key, random_cid(&mut blockstore)?); + } + Ok(obj) +} + +pub fn random_str(len: usize) -> String { + const CHARSET: &[u8] = b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; + let charset_len = CHARSET.len(); + let mut rng = thread_rng(); + + let result: String = (0..len) + .map(|_| { + let idx = rng.gen_range(0..charset_len); + CHARSET[idx] as char + }) + .collect(); + result +} diff --git a/rsky-pds/src/sequencer/events.rs b/rsky-pds/src/sequencer/events.rs index a6a7b6b..46222a3 100644 --- a/rsky-pds/src/sequencer/events.rs +++ b/rsky-pds/src/sequencer/events.rs @@ -10,9 +10,9 @@ use crate::repo::util::format_data_key; use anyhow::Result; use lexicon_cid::Cid; use rsky_lexicon::com::atproto::sync::AccountStatus as LexiconAccountStatus; +use rsky_syntax::aturi::AtUri; use serde::de::Error as DeserializerError; use serde::{Deserialize, Deserializer}; -use rsky_syntax::aturi::AtUri; #[derive(Debug, Clone, PartialEq, Deserialize, Serialize)] pub enum CommitEvtOpAction { diff --git a/rsky-pds/src/vendored/iroh_car/reader.rs b/rsky-pds/src/vendored/iroh_car/reader.rs index 8b01262..39038ca 100644 --- a/rsky-pds/src/vendored/iroh_car/reader.rs +++ b/rsky-pds/src/vendored/iroh_car/reader.rs @@ -65,8 +65,8 @@ mod tests { use futures::TryStreamExt; use libipld::cbor::DagCborCodec; - use libipld::Cid; use libipld::multihash::{Code, MultihashDigest}; + use libipld::Cid; use super::super::{header::CarHeaderV1, writer::CarWriter};