Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: db state sync by merk chunking #292

Merged
merged 31 commits into from
May 1, 2024
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ members = [
"node-grove",
"storage",
"visualize",
"path",
"path"
]
2 changes: 2 additions & 0 deletions grovedb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ nohash-hasher = { version = "0.2.0", optional = true }
indexmap = { version = "1.9.2", optional = true }
intmap = { version = "2.0.0", optional = true }
grovedb-path = { version = "1.0.0-rc.2", path = "../path" }
blake3 = "1.4.0"
bitvec = "1"

[dev-dependencies]
rand = "0.8.5"
Expand Down
15 changes: 7 additions & 8 deletions grovedb/src/batch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ impl GroveDbOp {
}

/// Verify consistency of operations
pub fn verify_consistency_of_operations(ops: &Vec<GroveDbOp>) -> GroveDbOpConsistencyResults {
pub fn verify_consistency_of_operations(ops: &[GroveDbOp]) -> GroveDbOpConsistencyResults {
let ops_len = ops.len();
// operations should not have any duplicates
let mut repeated_ops = vec![];
Expand Down Expand Up @@ -2432,8 +2432,8 @@ mod tests {
Element::empty_tree(),
),
];
assert!(matches!(
db.apply_batch(
assert!(db
.apply_batch(
ops,
Some(BatchApplyOptions {
validate_insertion_does_not_override: false,
Expand All @@ -2446,9 +2446,8 @@ mod tests {
}),
None
)
.unwrap(),
Ok(_)
));
.unwrap()
.is_ok());
}

#[test]
Expand Down Expand Up @@ -3489,7 +3488,7 @@ mod tests {
elem.clone(),
),
];
assert!(matches!(db.apply_batch(batch, None, None).unwrap(), Ok(_)));
assert!(db.apply_batch(batch, None, None).unwrap().is_ok());
assert_eq!(
db.get([TEST_LEAF].as_ref(), b"key1", None)
.unwrap()
Expand All @@ -3506,7 +3505,7 @@ mod tests {
.unwrap()
.expect("should generate proof");
let verification_result = GroveDb::verify_query_raw(&proof, &path_query);
assert!(matches!(verification_result, Ok(_)));
assert!(verification_result.is_ok());

// Hit reference limit when you specify max reference hop, lower than actual hop
// count
Expand Down
27 changes: 22 additions & 5 deletions grovedb/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,10 @@
#[cfg(feature = "full")]
mod visualize;

use std::collections::BTreeSet;
#[cfg(feature = "full")]
use std::{collections::HashMap, option::Option::None, path::Path};

#[cfg(any(feature = "full", feature = "verify"))]
use element::helpers;
#[cfg(any(feature = "full", feature = "verify"))]
pub use element::Element;
#[cfg(feature = "full")]
Expand Down Expand Up @@ -201,6 +200,8 @@
tree::{combine_hash, value_hash},
BatchEntry, CryptoHash, KVIterator, Merk,
};
#[cfg(feature = "full")]
use grovedb_merk::{proofs::Op, ChunkProducer, Restorer};

Check warning on line 204 in grovedb/src/lib.rs

View workflow job for this annotation

GitHub Actions / clippy

unused imports: `ChunkProducer`, `Restorer`, `proofs::Op`

warning: unused imports: `ChunkProducer`, `Restorer`, `proofs::Op` --> grovedb/src/lib.rs:204:20 | 204 | use grovedb_merk::{proofs::Op, ChunkProducer, Restorer}; | ^^^^^^^^^^ ^^^^^^^^^^^^^ ^^^^^^^^ | = note: `#[warn(unused_imports)]` on by default
use grovedb_path::SubtreePath;
#[cfg(feature = "full")]
use grovedb_storage::rocksdb_storage::PrefixedRocksDbImmediateStorageContext;
Expand All @@ -217,13 +218,15 @@
use grovedb_visualize::DebugByteVectors;
#[cfg(any(feature = "full", feature = "verify"))]
pub use query::{PathQuery, SizedQuery};
#[cfg(feature = "full")]
pub use replication::{BufferedRestorer, Restorer, SiblingsChunkProducer, SubtreeChunkProducer};

#[cfg(feature = "full")]
use crate::element::helpers::raw_decode;
#[cfg(any(feature = "full", feature = "verify"))]
pub use crate::error::Error;
#[cfg(feature = "full")]
use crate::helpers::raw_decode;
pub use crate::replication::StateSyncInfo;
#[cfg(feature = "full")]
use crate::replication::SubtreesMetadata;
#[cfg(feature = "full")]
use crate::util::{root_merk_optional_tx, storage_context_optional_tx};
use crate::Error::MerkError;
Expand All @@ -237,6 +240,8 @@
db: RocksDbStorage,
}

pub(crate) type SubtreePrefix = [u8; blake3::OUT_LEN];

/// Transaction
#[cfg(feature = "full")]
pub type Transaction<'db> = <RocksDbStorage as Storage<'db>>::Transaction;
Expand All @@ -246,6 +251,18 @@

#[cfg(feature = "full")]
impl GroveDb {
pub fn create_state_sync_info(&self) -> StateSyncInfo {
let pending_chunks = BTreeSet::new();
let processed_prefixes = BTreeSet::new();
StateSyncInfo {
restorer: None,
processed_prefixes,
current_prefix: None,
pending_chunks,
num_processed_chunks: 0,
}
}

/// Opens a given path
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
let db = RocksDbStorage::default_rocksdb_with_path(path)?;
Expand Down Expand Up @@ -844,7 +861,7 @@
pub fn verify_grovedb(
&self,
transaction: TransactionArg,
) -> Result<HashMap<Vec<Vec<u8>>, (CryptoHash, CryptoHash, CryptoHash)>, Error> {

Check warning on line 864 in grovedb/src/lib.rs

View workflow job for this annotation

GitHub Actions / clippy

very complex type used. Consider factoring parts into `type` definitions

warning: very complex type used. Consider factoring parts into `type` definitions --> grovedb/src/lib.rs:864:10 | 864 | ) -> Result<HashMap<Vec<Vec<u8>>, (CryptoHash, CryptoHash, CryptoHash)>, Error> { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#type_complexity = note: `#[warn(clippy::type_complexity)]` on by default
if let Some(transaction) = transaction {
let root_merk = self
.open_transactional_merk_at_path(SubtreePath::empty(), transaction, None)
Expand All @@ -870,7 +887,7 @@
merk: Merk<S>,
path: &SubtreePath<B>,
batch: Option<&'db StorageBatch>,
) -> Result<HashMap<Vec<Vec<u8>>, (CryptoHash, CryptoHash, CryptoHash)>, Error> {

Check warning on line 890 in grovedb/src/lib.rs

View workflow job for this annotation

GitHub Actions / clippy

very complex type used. Consider factoring parts into `type` definitions

warning: very complex type used. Consider factoring parts into `type` definitions --> grovedb/src/lib.rs:890:10 | 890 | ) -> Result<HashMap<Vec<Vec<u8>>, (CryptoHash, CryptoHash, CryptoHash)>, Error> { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#type_complexity
let mut all_query = Query::new();
all_query.insert_all();

Expand Down Expand Up @@ -940,7 +957,7 @@
path: &SubtreePath<B>,
batch: Option<&'db StorageBatch>,
transaction: &Transaction,
) -> Result<HashMap<Vec<Vec<u8>>, (CryptoHash, CryptoHash, CryptoHash)>, Error> {

Check warning on line 960 in grovedb/src/lib.rs

View workflow job for this annotation

GitHub Actions / clippy

very complex type used. Consider factoring parts into `type` definitions

warning: very complex type used. Consider factoring parts into `type` definitions --> grovedb/src/lib.rs:960:10 | 960 | ) -> Result<HashMap<Vec<Vec<u8>>, (CryptoHash, CryptoHash, CryptoHash)>, Error> { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#type_complexity
let mut all_query = Query::new();
all_query.insert_all();

Expand Down
54 changes: 51 additions & 3 deletions grovedb/src/operations/auxiliary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,17 @@

#[cfg(feature = "full")]
use grovedb_costs::{
cost_return_on_error_no_add, storage_cost::key_value_cost::KeyValueStorageCost, CostResult,
CostsExt, OperationCost,
cost_return_on_error, cost_return_on_error_no_add,
storage_cost::key_value_cost::KeyValueStorageCost, CostResult, CostsExt, OperationCost,
};
use grovedb_path::SubtreePath;
#[cfg(feature = "full")]
use grovedb_storage::StorageContext;
use grovedb_storage::{Storage, StorageBatch};

use crate::util::storage_context_optional_tx;
#[cfg(feature = "full")]
use crate::{util::meta_storage_context_optional_tx, Error, GroveDb, TransactionArg};
use crate::{util::meta_storage_context_optional_tx, Element, Error, GroveDb, TransactionArg};

#[cfg(feature = "full")]
impl GroveDb {
Expand Down Expand Up @@ -118,4 +120,50 @@ impl GroveDb {
Ok(value).wrap_with_cost(cost)
})
}

// TODO: dumb traversal should not be tolerated
/// Finds keys which are trees for a given subtree recursively.
/// One element means a key of a `merk`, n > 1 elements mean relative path
/// for a deeply nested subtree.
pub fn find_subtrees<B: AsRef<[u8]>>(
&self,
path: &SubtreePath<B>,
transaction: TransactionArg,
) -> CostResult<Vec<Vec<Vec<u8>>>, Error> {
let mut cost = OperationCost::default();

// TODO: remove conversion to vec;
// However, it's not easy for a reason:
// new keys to enqueue are taken from raw iterator which returns Vec<u8>;
// changing that to slice is hard as cursor should be moved for next iteration
// which requires exclusive (&mut) reference, also there is no guarantee that
// slice which points into storage internals will remain valid if raw
// iterator got altered so why that reference should be exclusive;
//
// Update: there are pinned views into RocksDB to return slices of data, perhaps
// there is something for iterators

let mut queue: Vec<Vec<Vec<u8>>> = vec![path.to_vec()];
let mut result: Vec<Vec<Vec<u8>>> = queue.clone();

while let Some(q) = queue.pop() {
let subtree_path: SubtreePath<Vec<u8>> = q.as_slice().into();
// Get the correct subtree with q_ref as path
storage_context_optional_tx!(self.db, subtree_path, None, transaction, storage, {
let storage = storage.unwrap_add_cost(&mut cost);
let mut raw_iter = Element::iterator(storage.raw_iter()).unwrap_add_cost(&mut cost);
while let Some((key, value)) =
cost_return_on_error!(&mut cost, raw_iter.next_element())
{
if value.is_tree() {
let mut sub_path = q.clone();
sub_path.push(key.to_vec());
queue.push(sub_path.clone());
result.push(sub_path);
}
}
})
}
Ok(result).wrap_with_cost(cost)
}
}
58 changes: 3 additions & 55 deletions grovedb/src/operations/delete/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ use grovedb_storage::{
#[cfg(feature = "full")]
use crate::{
batch::{GroveDbOp, Op},
util::{storage_context_optional_tx, storage_context_with_parent_optional_tx},
util::storage_context_with_parent_optional_tx,
Element, ElementFlags, Error, GroveDb, Transaction, TransactionArg,
};
use crate::{raw_decode, util::merk_optional_tx_path_not_empty};
Expand Down Expand Up @@ -879,52 +879,6 @@ impl GroveDb {

Ok(true).wrap_with_cost(cost)
}

// TODO: dumb traversal should not be tolerated
/// Finds keys which are trees for a given subtree recursively.
/// One element means a key of a `merk`, n > 1 elements mean relative path
/// for a deeply nested subtree.
pub(crate) fn find_subtrees<B: AsRef<[u8]>>(
&self,
path: &SubtreePath<B>,
transaction: TransactionArg,
) -> CostResult<Vec<Vec<Vec<u8>>>, Error> {
let mut cost = OperationCost::default();

// TODO: remove conversion to vec;
// However, it's not easy for a reason:
// new keys to enqueue are taken from raw iterator which returns Vec<u8>;
// changing that to slice is hard as cursor should be moved for next iteration
// which requires exclusive (&mut) reference, also there is no guarantee that
// slice which points into storage internals will remain valid if raw
// iterator got altered so why that reference should be exclusive;
//
// Update: there are pinned views into RocksDB to return slices of data, perhaps
// there is something for iterators

let mut queue: Vec<Vec<Vec<u8>>> = vec![path.to_vec()];
let mut result: Vec<Vec<Vec<u8>>> = queue.clone();

while let Some(q) = queue.pop() {
let subtree_path: SubtreePath<Vec<u8>> = q.as_slice().into();
// Get the correct subtree with q_ref as path
storage_context_optional_tx!(self.db, subtree_path, None, transaction, storage, {
let storage = storage.unwrap_add_cost(&mut cost);
let mut raw_iter = Element::iterator(storage.raw_iter()).unwrap_add_cost(&mut cost);
while let Some((key, value)) =
cost_return_on_error!(&mut cost, raw_iter.next_element())
{
if value.is_tree() {
let mut sub_path = q.clone();
sub_path.push(key.to_vec());
queue.push(sub_path.clone());
result.push(sub_path);
}
}
})
}
Ok(result).wrap_with_cost(cost)
}
}

#[cfg(feature = "full")]
Expand Down Expand Up @@ -1029,10 +983,7 @@ mod tests {
db.get([TEST_LEAF].as_ref(), b"key1", None).unwrap(),
Err(Error::PathKeyNotFound(_))
));
assert!(matches!(
db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap(),
Ok(_)
));
assert!(db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap().is_ok());
}

#[test]
Expand Down Expand Up @@ -1397,10 +1348,7 @@ mod tests {
db.get([TEST_LEAF].as_ref(), b"key1", None).unwrap(),
Err(Error::PathKeyNotFound(_))
));
assert!(matches!(
db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap(),
Ok(_)
));
assert!(db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap().is_ok());
}

#[test]
Expand Down
Loading
Loading