Skip to content

Commit

Permalink
feat: db state sync by merk chunking (#292)
Browse files Browse the repository at this point in the history
* wip

    finalize multi chunk with limit

    implement chunk op

    test chunk op encoding

    convert chunk op chunk id to string

    implement traversal instruction to string

    add chunking error + devoid multi subtree chunk from encoding work

    multi-subtree-chunk skeleton + return encoding length in multichunk

    make chunks fixed size height 2

    test height proof

    implement height proof verifier

    update documentation

    verify height proof generation

    add documentation

    test no of chunk under chunk id fn

    implement number of chunks under chunk id function

    extract chunk layer function from chunk height

    seperate number_of_chunk into height and layer_height functions

    return multi chunk result

    enforce limit without storage overhead

    add test for encoding length check

    implement iterator for chunk producer

    remove cost from chunks

    fix the error type

    implement random chunk access

    fixes

    implement chunk height function

    add traverse then build chunk function to ref walker

    update comment

    implement chunk producer length

    init chunk producer struct

    implement merk tree height function

    update traversal generation instruction

    add instruction traversal test

    fix documentation

    implement binary range function

    clean up number of chunks function

    given a subtree of a given height return the exit node count

    documentation fixes

    implement chunk_height_per_layer

    verify that chunks produce expected root hash

    implement and test variable depth chunk creation

    restart chunking v2

* Squashed commit of the following:

    remove bad test

    rename files

    update documentation

    wip

    wip

    implement merk verifier + state building

    implement replication from multichunk

    fix chunk verification

    fixed implementation of chunkid from traversal instructions

    fix some tests

    make chunk_id from traversal instruction test resistant to changes in underlying chunking scheme

    add restoration logic test function

    returning the next chunk id when you call chunk

    use strings as communication interface between producer and restorer

    implement chunk id from traversal instruction

    add traversal instruction generation to direct string

    chunk producer returns next index as string for multi chunk

    clean up rewrite parent links

    restoration done successfully

    rough implementation of rewrite parent

    implement function to extract sum from node type

    wip

    chunk write logic + restorer finalization + parent key tracking

    new visit ref function that keeps track of traversal path

    implement instruction string to traversal instruction

    test child to link functionality for basic and sum merks

    implement node to link include sum

    wip

    implement and test chunk verification

    Fix layer iter function

    Previous implementation made a key assumption that nodes are unique
    including hash nodes, this made the layer iteration functionality
    depend on the contents of the tree, which shouldn't be the case.

    This adds a simpler implementation of the layer iter logic using breadth
    first search.

    add test to ensure chunks only contain hash and kvfeaturetype

    test for avl tree during proof op execution

    remove chunk_height_per_layer_lin_comb every chunk now has fixed height of 2

* wip

* wip

* wip

* rename job

* clippy fixes

* feat: base state sync

* dynamic chunk id calculation

* more work

* more work

* more work

* fix: help with lifetimes

* more work

* final work

* cargo fmt

* more fmt

* clippy fixes

* more fmt

* fix for verify feature

* more fmt

* test fixes

* more fmt

* refactor

* refactor

* refactor

* more refactoring

* more refactoring

* fmt

* suggestions

---------

Co-authored-by: Wisdom Ogwu <[email protected]>
  • Loading branch information
ogabrielides and iammadab authored May 1, 2024
1 parent d9292aa commit 7267fcf
Show file tree
Hide file tree
Showing 30 changed files with 5,055 additions and 2,401 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ members = [
"node-grove",
"storage",
"visualize",
"path",
"path"
]
2 changes: 2 additions & 0 deletions grovedb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ nohash-hasher = { version = "0.2.0", optional = true }
indexmap = { version = "2.2.6", optional = true }
intmap = { version = "2.0.0", optional = true }
grovedb-path = { version = "1.0.0-rc.2", path = "../path" }
blake3 = "1.4.0"
bitvec = "1"

[dev-dependencies]
rand = "0.8.5"
Expand Down
15 changes: 7 additions & 8 deletions grovedb/src/batch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ impl GroveDbOp {
}

/// Verify consistency of operations
pub fn verify_consistency_of_operations(ops: &Vec<GroveDbOp>) -> GroveDbOpConsistencyResults {
pub fn verify_consistency_of_operations(ops: &[GroveDbOp]) -> GroveDbOpConsistencyResults {
let ops_len = ops.len();
// operations should not have any duplicates
let mut repeated_ops = vec![];
Expand Down Expand Up @@ -2424,8 +2424,8 @@ mod tests {
Element::empty_tree(),
),
];
assert!(matches!(
db.apply_batch(
assert!(db
.apply_batch(
ops,
Some(BatchApplyOptions {
validate_insertion_does_not_override: false,
Expand All @@ -2438,9 +2438,8 @@ mod tests {
}),
None
)
.unwrap(),
Ok(_)
));
.unwrap()
.is_ok());
}

#[test]
Expand Down Expand Up @@ -3481,7 +3480,7 @@ mod tests {
elem.clone(),
),
];
assert!(matches!(db.apply_batch(batch, None, None).unwrap(), Ok(_)));
assert!(db.apply_batch(batch, None, None).unwrap().is_ok());
assert_eq!(
db.get([TEST_LEAF].as_ref(), b"key1", None)
.unwrap()
Expand All @@ -3498,7 +3497,7 @@ mod tests {
.unwrap()
.expect("should generate proof");
let verification_result = GroveDb::verify_query_raw(&proof, &path_query);
assert!(matches!(verification_result, Ok(_)));
assert!(verification_result.is_ok());

// Hit reference limit when you specify max reference hop, lower than actual hop
// count
Expand Down
12 changes: 5 additions & 7 deletions grovedb/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ pub mod query_result_type;
#[cfg(any(feature = "full", feature = "verify"))]
pub mod reference_path;
#[cfg(feature = "full")]
mod replication;
pub mod replication;
#[cfg(all(test, feature = "full"))]
mod tests;
#[cfg(feature = "full")]
Expand All @@ -172,8 +172,6 @@ mod visualize;
#[cfg(feature = "full")]
use std::{collections::HashMap, option::Option::None, path::Path};

#[cfg(any(feature = "full", feature = "verify"))]
use element::helpers;
#[cfg(any(feature = "full", feature = "verify"))]
pub use element::Element;
#[cfg(feature = "full")]
Expand Down Expand Up @@ -217,14 +215,12 @@ use grovedb_storage::{Storage, StorageContext};
use grovedb_visualize::DebugByteVectors;
#[cfg(any(feature = "full", feature = "verify"))]
pub use query::{PathQuery, SizedQuery};
#[cfg(feature = "full")]
pub use replication::{BufferedRestorer, Restorer, SiblingsChunkProducer, SubtreeChunkProducer};

#[cfg(feature = "full")]
use crate::element::helpers::raw_decode;
#[cfg(any(feature = "full", feature = "verify"))]
pub use crate::error::Error;
#[cfg(feature = "full")]
use crate::helpers::raw_decode;
#[cfg(feature = "full")]
use crate::util::{root_merk_optional_tx, storage_context_optional_tx};
use crate::Error::MerkError;

Expand All @@ -237,6 +233,8 @@ pub struct GroveDb {
db: RocksDbStorage,
}

pub(crate) type SubtreePrefix = [u8; blake3::OUT_LEN];

/// Transaction
#[cfg(feature = "full")]
pub type Transaction<'db> = <RocksDbStorage as Storage<'db>>::Transaction;
Expand Down
54 changes: 51 additions & 3 deletions grovedb/src/operations/auxiliary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,17 @@
#[cfg(feature = "full")]
use grovedb_costs::{
cost_return_on_error_no_add, storage_cost::key_value_cost::KeyValueStorageCost, CostResult,
CostsExt, OperationCost,
cost_return_on_error, cost_return_on_error_no_add,
storage_cost::key_value_cost::KeyValueStorageCost, CostResult, CostsExt, OperationCost,
};
use grovedb_path::SubtreePath;
#[cfg(feature = "full")]
use grovedb_storage::StorageContext;
use grovedb_storage::{Storage, StorageBatch};

use crate::util::storage_context_optional_tx;
#[cfg(feature = "full")]
use crate::{util::meta_storage_context_optional_tx, Error, GroveDb, TransactionArg};
use crate::{util::meta_storage_context_optional_tx, Element, Error, GroveDb, TransactionArg};

#[cfg(feature = "full")]
impl GroveDb {
Expand Down Expand Up @@ -118,4 +120,50 @@ impl GroveDb {
Ok(value).wrap_with_cost(cost)
})
}

// TODO: dumb traversal should not be tolerated
/// Finds keys which are trees for a given subtree recursively.
/// One element means a key of a `merk`, n > 1 elements mean relative path
/// for a deeply nested subtree.
pub fn find_subtrees<B: AsRef<[u8]>>(
&self,
path: &SubtreePath<B>,
transaction: TransactionArg,
) -> CostResult<Vec<Vec<Vec<u8>>>, Error> {
let mut cost = OperationCost::default();

// TODO: remove conversion to vec;
// However, it's not easy for a reason:
// new keys to enqueue are taken from raw iterator which returns Vec<u8>;
// changing that to slice is hard as cursor should be moved for next iteration
// which requires exclusive (&mut) reference, also there is no guarantee that
// slice which points into storage internals will remain valid if raw
// iterator got altered so why that reference should be exclusive;
//
// Update: there are pinned views into RocksDB to return slices of data, perhaps
// there is something for iterators

let mut queue: Vec<Vec<Vec<u8>>> = vec![path.to_vec()];
let mut result: Vec<Vec<Vec<u8>>> = queue.clone();

while let Some(q) = queue.pop() {
let subtree_path: SubtreePath<Vec<u8>> = q.as_slice().into();
// Get the correct subtree with q_ref as path
storage_context_optional_tx!(self.db, subtree_path, None, transaction, storage, {
let storage = storage.unwrap_add_cost(&mut cost);
let mut raw_iter = Element::iterator(storage.raw_iter()).unwrap_add_cost(&mut cost);
while let Some((key, value)) =
cost_return_on_error!(&mut cost, raw_iter.next_element())
{
if value.is_tree() {
let mut sub_path = q.clone();
sub_path.push(key.to_vec());
queue.push(sub_path.clone());
result.push(sub_path);
}
}
})
}
Ok(result).wrap_with_cost(cost)
}
}
58 changes: 3 additions & 55 deletions grovedb/src/operations/delete/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ use grovedb_storage::{
#[cfg(feature = "full")]
use crate::{
batch::{GroveDbOp, Op},
util::{storage_context_optional_tx, storage_context_with_parent_optional_tx},
util::storage_context_with_parent_optional_tx,
Element, ElementFlags, Error, GroveDb, Transaction, TransactionArg,
};
use crate::{raw_decode, util::merk_optional_tx_path_not_empty};
Expand Down Expand Up @@ -879,52 +879,6 @@ impl GroveDb {

Ok(true).wrap_with_cost(cost)
}

// TODO: dumb traversal should not be tolerated
/// Finds keys which are trees for a given subtree recursively.
/// One element means a key of a `merk`, n > 1 elements mean relative path
/// for a deeply nested subtree.
pub(crate) fn find_subtrees<B: AsRef<[u8]>>(
&self,
path: &SubtreePath<B>,
transaction: TransactionArg,
) -> CostResult<Vec<Vec<Vec<u8>>>, Error> {
let mut cost = OperationCost::default();

// TODO: remove conversion to vec;
// However, it's not easy for a reason:
// new keys to enqueue are taken from raw iterator which returns Vec<u8>;
// changing that to slice is hard as cursor should be moved for next iteration
// which requires exclusive (&mut) reference, also there is no guarantee that
// slice which points into storage internals will remain valid if raw
// iterator got altered so why that reference should be exclusive;
//
// Update: there are pinned views into RocksDB to return slices of data, perhaps
// there is something for iterators

let mut queue: Vec<Vec<Vec<u8>>> = vec![path.to_vec()];
let mut result: Vec<Vec<Vec<u8>>> = queue.clone();

while let Some(q) = queue.pop() {
let subtree_path: SubtreePath<Vec<u8>> = q.as_slice().into();
// Get the correct subtree with q_ref as path
storage_context_optional_tx!(self.db, subtree_path, None, transaction, storage, {
let storage = storage.unwrap_add_cost(&mut cost);
let mut raw_iter = Element::iterator(storage.raw_iter()).unwrap_add_cost(&mut cost);
while let Some((key, value)) =
cost_return_on_error!(&mut cost, raw_iter.next_element())
{
if value.is_tree() {
let mut sub_path = q.clone();
sub_path.push(key.to_vec());
queue.push(sub_path.clone());
result.push(sub_path);
}
}
})
}
Ok(result).wrap_with_cost(cost)
}
}

#[cfg(feature = "full")]
Expand Down Expand Up @@ -1029,10 +983,7 @@ mod tests {
db.get([TEST_LEAF].as_ref(), b"key1", None).unwrap(),
Err(Error::PathKeyNotFound(_))
));
assert!(matches!(
db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap(),
Ok(_)
));
assert!(db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap().is_ok());
}

#[test]
Expand Down Expand Up @@ -1397,10 +1348,7 @@ mod tests {
db.get([TEST_LEAF].as_ref(), b"key1", None).unwrap(),
Err(Error::PathKeyNotFound(_))
));
assert!(matches!(
db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap(),
Ok(_)
));
assert!(db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap().is_ok());
}

#[test]
Expand Down
Loading

0 comments on commit 7267fcf

Please sign in to comment.