Skip to content

Commit

Permalink
refactor: remove expensive clones from verify_consistency_of_operatio…
Browse files Browse the repository at this point in the history
…ns; convert API to slice

There was never any need for this function to take in a &Vec; we should never do that. Instead take in a slice.

Additionally, we refactored some of the loop logic to use .take instead of a check on each iteration to see if we were at the end. We also combined two duplicate for loops into 1.

Finally, we significantly refactor the return type to avoid a lot of very expensive cloning. Previously, we were doing something on the magnitude of 9 clones (of expensive underlying objects including vectors) per input GroveDbOp. All of these have been removed. We instead return references to these values instead of clones of them. This does mean that the lifetime of the return value is tied to the underlying data the slice refers to; but especially as I cannot even find an instance where we use these return values, this feels like a very fair trade off.
  • Loading branch information
PastaPastaPasta committed Mar 18, 2024
1 parent d007bed commit 2ea41a3
Showing 1 changed file with 49 additions and 55 deletions.
104 changes: 49 additions & 55 deletions grovedb/src/batch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -552,95 +552,87 @@ impl GroveDbOp {
}

/// Verify consistency of operations
pub fn verify_consistency_of_operations(ops: &Vec<GroveDbOp>) -> GroveDbOpConsistencyResults {
let ops_len = ops.len();
pub fn verify_consistency_of_operations(ops: &[GroveDbOp]) -> GroveDbOpConsistencyResults {
// operations should not have any duplicates
let mut repeated_ops = vec![];
for (i, op) in ops.iter().enumerate() {
if i == ops_len {
continue;
} // Don't do last one
let mut same_path_key_ops = vec![];
// Exclude the last item
for (i, op) in ops.iter().take(ops.len() - 1).enumerate() {
let count = ops
.split_at(i + 1)
.1
.iter()
.filter(|&current_op| current_op == op)
.count() as u16;
if count > 1 {
repeated_ops.push((op.clone(), count));
repeated_ops.push((op, count));
}
}

let mut same_path_key_ops = vec![];

// No double insert or delete of same key in same path
for (i, op) in ops.iter().enumerate() {
if i == ops_len {
continue;
} // Don't do last one
// No double insert or delete of same key in same path
let mut doubled_ops = ops
.split_at(i + 1)
.1
.iter()
.filter_map(|current_op| {
if current_op.path == op.path && current_op.key == op.key {
Some(current_op.op.clone())
Some(&current_op.op)
} else {
None
}
})
.collect::<Vec<Op>>();
.collect::<Vec<&Op>>();
if !doubled_ops.is_empty() {
doubled_ops.push(op.op.clone());
same_path_key_ops.push((op.path.clone(), op.key.clone(), doubled_ops));
doubled_ops.push(&op.op);
same_path_key_ops.push((&op.path, &op.key, doubled_ops));
}
}

let inserts = ops
.iter()
.filter_map(|current_op| match current_op.op {
Op::Insert { .. } | Op::Replace { .. } => Some(current_op.clone()),
Op::Insert { .. } | Op::Replace { .. } => Some(current_op),
_ => None,
})

Check warning on line 595 in grovedb/src/batch/mod.rs

View workflow job for this annotation

GitHub Actions / clippy

this `.filter_map` can be written more simply using `.filter`

warning: this `.filter_map` can be written more simply using `.filter` --> grovedb/src/batch/mod.rs:590:23 | 590 | let inserts = ops | _______________________^ 591 | | .iter() 592 | | .filter_map(|current_op| match current_op.op { 593 | | Op::Insert { .. } | Op::Replace { .. } => Some(current_op), 594 | | _ => None, 595 | | }) | |______________^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#unnecessary_filter_map = note: `#[warn(clippy::unnecessary_filter_map)]` on by default
.collect::<Vec<GroveDbOp>>();
.collect::<Vec<&GroveDbOp>>();

let deletes = ops
.iter()
.filter_map(|current_op| {
if let Op::Delete = current_op.op {
Some(current_op.clone())
Some(current_op)
} else {
None
}
})

Check warning on line 606 in grovedb/src/batch/mod.rs

View workflow job for this annotation

GitHub Actions / clippy

this `.filter_map` can be written more simply using `.filter`

warning: this `.filter_map` can be written more simply using `.filter` --> grovedb/src/batch/mod.rs:598:23 | 598 | let deletes = ops | _______________________^ 599 | | .iter() 600 | | .filter_map(|current_op| { 601 | | if let Op::Delete = current_op.op { ... | 605 | | } 606 | | }) | |______________^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#unnecessary_filter_map
.collect::<Vec<GroveDbOp>>();

let mut insert_ops_below_deleted_ops = vec![];
.collect::<Vec<&GroveDbOp>>();

// No inserts under a deleted path
for deleted_op in deletes.iter() {
let mut deleted_qualified_path = deleted_op.path.clone();
deleted_qualified_path.push(deleted_op.key.clone());
let inserts_with_deleted_ops_above = inserts
.iter()
.filter_map(|inserted_op| {
if deleted_op.path.len() < inserted_op.path.len()
&& deleted_qualified_path
.iterator()
.zip(inserted_op.path.iterator())
.all(|(a, b)| a == b)
{
Some(inserted_op.clone())
} else {
None
}
})
.collect::<Vec<GroveDbOp>>();
if !inserts_with_deleted_ops_above.is_empty() {
insert_ops_below_deleted_ops
.push((deleted_op.clone(), inserts_with_deleted_ops_above));
}
}
let insert_ops_below_deleted_ops = deletes
.iter()
.filter_map(|&deleted_op| {
let mut deleted_qualified_path = deleted_op.path.clone();
deleted_qualified_path.push(deleted_op.key.clone());
let inserts_with_deleted_ops_above = inserts
.iter()
.filter_map(|&inserted_op| {
if deleted_op.path.len() < inserted_op.path.len()
&& deleted_qualified_path
.iterator()
.zip(inserted_op.path.iterator())
.all(|(a, b)| a == b)
{
Some(inserted_op)
} else {
None
}
})
.collect::<Vec<&GroveDbOp>>();
if !inserts_with_deleted_ops_above.is_empty() {
Some((deleted_op, inserts_with_deleted_ops_above))
} else {
None
}
})
.collect::<Vec<(&GroveDbOp, Vec<&GroveDbOp>)>>();

GroveDbOpConsistencyResults {
repeated_ops,
Expand All @@ -652,14 +644,16 @@ impl GroveDbOp {

/// Results of a consistency check on an operation batch
#[derive(Debug)]
pub struct GroveDbOpConsistencyResults {
repeated_ops: Vec<(GroveDbOp, u16)>, // the u16 is count
same_path_key_ops: Vec<(KeyInfoPath, KeyInfo, Vec<Op>)>,
insert_ops_below_deleted_ops: Vec<(GroveDbOp, Vec<GroveDbOp>)>, /* the deleted op first,
* then inserts under */
pub struct GroveDbOpConsistencyResults<'a> {
repeated_ops: Vec<(&'a GroveDbOp, u16)>, // the u16 is count
same_path_key_ops: Vec<(&'a KeyInfoPath, &'a KeyInfo, Vec<&'a Op>)>,
insert_ops_below_deleted_ops: Vec<(&'a GroveDbOp, Vec<&'a GroveDbOp>)>, /* the deleted op
* first,
* then inserts
* under */
}

impl GroveDbOpConsistencyResults {
impl GroveDbOpConsistencyResults<'_> {
/// Check if results are empty
pub fn is_empty(&self) -> bool {
self.repeated_ops.is_empty()
Expand Down

0 comments on commit 2ea41a3

Please sign in to comment.