Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/build/optimize-memory' into buil…
Browse files Browse the repository at this point in the history
…d/optimize-memory
  • Loading branch information
lklimek committed Nov 6, 2024
2 parents eafa040 + 98f0af6 commit 49ee181
Show file tree
Hide file tree
Showing 16 changed files with 335 additions and 75 deletions.
26 changes: 25 additions & 1 deletion packages/rs-drive-abci/src/abci/handler/finalize_block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::execution::types::block_execution_context::v0::BlockExecutionContextV
use crate::platform_types::cleaned_abci_messages::finalized_block_cleaned_request::v0::FinalizeBlockCleanedRequest;
use crate::platform_types::platform_state::v0::PlatformStateV0Methods;
use crate::rpc::core::CoreRPCLike;
use dpp::dashcore::Network;
use std::sync::atomic::Ordering;
use tenderdash_abci::proto::abci as proto;

Expand Down Expand Up @@ -66,7 +67,30 @@ where
));
}

app.commit_transaction(platform_version)?;
let result = app.commit_transaction(platform_version);

// We had a sequence of errors on the mainnet started since block 32326.
// We got RocksDB's "transaction is busy" error because of a bug (https://github.com/dashpay/platform/pull/2309).
// Due to another bug in Tenderdash (https://github.com/dashpay/tenderdash/pull/966),
// validators just proceeded to the next block partially committing the state and updating the cache.
// Full nodes are stuck and proceeded after re-sync.
// For the mainnet chain, we enable these fixes at the block when we consider the state is consistent.
let config = &app.platform().config;

if app.platform().config.network == Network::Dash
&& config.abci.chain_id == "evo1"
&& block_height < 33000
{
// Old behavior on mainnet below block 33000
result?;
} else {
// In case if transaction commit failed we still have caches in memory that
// corresponds to the data that we weren't able to commit.
// The simplified solution is to restart the Drive, so all caches
// will be restored from the disk and try to process this block again.
// TODO: We need a better handling of the transaction is busy error with retry logic.
result.expect("commit transaction");
}

app.platform()
.committed_block_height_guard
Expand Down
52 changes: 46 additions & 6 deletions packages/rs-drive-abci/src/abci/handler/info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::abci::AbciError;
use crate::error::Error;
use crate::platform_types::platform_state::v0::PlatformStateV0Methods;
use crate::rpc::core::CoreRPCLike;
use dpp::dashcore::Network;
use dpp::version::DESIRED_PLATFORM_VERSION;
use tenderdash_abci::proto::abci as proto;

Expand All @@ -21,28 +22,67 @@ where

let platform_state = app.platform().state.load();

let state_app_hash = platform_state
let last_block_height = platform_state.last_committed_block_height() as i64;

// Verify that Platform State corresponds to Drive commited state
let platform_state_app_hash = platform_state
.last_committed_block_app_hash()
.map(|app_hash| app_hash.to_vec())
.unwrap_or_default();

let grove_version = &platform_state
.current_platform_version()?
.drive
.grove_version;

let drive_storage_root_hash = app
.platform()
.drive
.grove
.root_hash(None, grove_version)
.unwrap()?;

// We had a sequence of errors on the mainnet started since block 32326.
// We got RocksDB's "transaction is busy" error because of a bug (https://github.com/dashpay/platform/pull/2309).
// Due to another bug in Tenderdash (https://github.com/dashpay/tenderdash/pull/966),
// validators just proceeded to the next block partially committing the state and updating the cache.
// Full nodes are stuck and proceeded after re-sync.
// For the mainnet chain, we enable these fixes at the block when we consider the state is consistent.
let config = &app.platform().config;

#[allow(clippy::collapsible_if)]
if !(config.network == Network::Dash
&& config.abci.chain_id == "evo1"
&& last_block_height < 33000)
{
// App hash in memory must be equal to app hash on disk
if drive_storage_root_hash != platform_state_app_hash {
// We panic because we can't recover from this situation.
// Better to restart the Drive, so we might self-heal the node
// reloading state form the disk
panic!(
"drive and platform state app hash mismatch: drive_storage_root_hash: {:?}, platform_state_app_hash: {:?}",
drive_storage_root_hash, platform_state_app_hash
);
}
}

let desired_protocol_version = DESIRED_PLATFORM_VERSION.protocol_version;

let response = proto::ResponseInfo {
data: "".to_string(),
app_version: desired_protocol_version as u64,
last_block_height: platform_state.last_committed_block_height() as i64,
last_block_height,
version: env!("CARGO_PKG_VERSION").to_string(),
last_block_app_hash: state_app_hash.clone(),
last_block_app_hash: platform_state_app_hash.to_vec(),
};

tracing::debug!(
desired_protocol_version,
software_version = env!("CARGO_PKG_VERSION"),
block_version = request.block_version,
p2p_version = request.p2p_version,
app_hash = hex::encode(state_app_hash),
height = platform_state.last_committed_block_height(),
app_hash = hex::encode(platform_state_app_hash),
last_block_height,
"Handshake with consensus engine",
);

Expand Down
43 changes: 43 additions & 0 deletions packages/rs-drive-abci/src/abci/handler/prepare_proposal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::platform_types::platform_state::v0::PlatformStateV0Methods;
use crate::platform_types::state_transitions_processing_result::StateTransitionExecutionResult;
use crate::rpc::core::CoreRPCLike;
use dpp::dashcore::hashes::Hash;
use dpp::dashcore::Network;
use dpp::version::TryIntoPlatformVersioned;
use drive::grovedb_storage::Error::RocksDBError;
use tenderdash_abci::proto::abci as proto;
Expand All @@ -35,6 +36,48 @@ where

let platform_state = app.platform().state.load();

// Verify that Platform State corresponds to Drive commited state
let platform_state_app_hash = platform_state
.last_committed_block_app_hash()
.unwrap_or_default();

let grove_version = &platform_state
.current_platform_version()?
.drive
.grove_version;

let drive_storage_root_hash = app
.platform()
.drive
.grove
.root_hash(None, grove_version)
.unwrap()?;

// We had a sequence of errors on the mainnet started since block 32326.
// We got RocksDB's "transaction is busy" error because of a bug (https://github.com/dashpay/platform/pull/2309).
// Due to another bug in Tenderdash (https://github.com/dashpay/tenderdash/pull/966),
// validators just proceeded to the next block partially committing the state and updating the cache.
// Full nodes are stuck and proceeded after re-sync.
// For the mainnet chain, we enable these fixes at the block when we consider the state is consistent.
let config = &app.platform().config;

#[allow(clippy::collapsible_if)]
if !(config.network == Network::Dash
&& config.abci.chain_id == "evo1"
&& request.height < 33000)
{
// App hash in memory must be equal to app hash on disk
if drive_storage_root_hash != platform_state_app_hash {
// We panic because we can't recover from this situation.
// Better to restart the Drive, so we might self-heal the node
// reloading state form the disk
panic!(
"drive and platform state app hash mismatch: drive_storage_root_hash: {:?}, platform_state_app_hash: {:?}",
drive_storage_root_hash, platform_state_app_hash
);
}
}

let last_committed_core_height = platform_state.last_committed_core_height();

let starting_platform_version = platform_state.current_platform_version()?;
Expand Down
43 changes: 43 additions & 0 deletions packages/rs-drive-abci/src/abci/handler/process_proposal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use crate::platform_types::block_execution_outcome;
use crate::platform_types::platform_state::v0::PlatformStateV0Methods;
use crate::platform_types::state_transitions_processing_result::StateTransitionExecutionResult;
use crate::rpc::core::CoreRPCLike;
use dpp::dashcore::Network;
use dpp::version::TryIntoPlatformVersioned;
use drive::grovedb_storage::Error::RocksDBError;
use tenderdash_abci::proto::abci as proto;
Expand Down Expand Up @@ -179,6 +180,48 @@ where

let platform_state = app.platform().state.load();

// Verify that Platform State corresponds to Drive commited state
let platform_state_app_hash = platform_state
.last_committed_block_app_hash()
.unwrap_or_default();

let grove_version = &platform_state
.current_platform_version()?
.drive
.grove_version;

let drive_storage_root_hash = app
.platform()
.drive
.grove
.root_hash(None, grove_version)
.unwrap()?;

// We had a sequence of errors on the mainnet started since block 32326.
// We got RocksDB's "transaction is busy" error because of a bug (https://github.com/dashpay/platform/pull/2309).
// Due to another bug in Tenderdash (https://github.com/dashpay/tenderdash/pull/966),
// validators just proceeded to the next block partially committing the state and updating the cache.
// Full nodes are stuck and proceeded after re-sync.
// For the mainnet chain, we enable these fixes at the block when we consider the state is consistent.
let config = &app.platform().config;

#[allow(clippy::collapsible_if)]
if !(app.platform().config.network == Network::Dash
&& config.abci.chain_id == "evo1"
&& request.height < 33000)
{
// App hash in memory must be equal to app hash on disk
if drive_storage_root_hash != platform_state_app_hash {
// We panic because we can't recover from this situation.
// Better to restart the Drive, so we might self-heal the node
// reloading state form the disk
panic!(
"drive and platform state app hash mismatch: drive_storage_root_hash: {:?}, platform_state_app_hash: {:?}",
drive_storage_root_hash, platform_state_app_hash
);
}
}

let starting_platform_version = platform_state.current_platform_version()?;

// Running the proposal executes all the state transitions for the block
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::error::Error;
use crate::platform_types::platform::Platform;
use crate::platform_types::platform_state::PlatformState;
use crate::rpc::core::CoreRPCLike;
use dpp::block::block_info::BlockInfo;
use dpp::version::PlatformVersion;
use drive::grovedb::TransactionArg;

Expand All @@ -14,6 +15,7 @@ where
/// Removes the votes for removed masternodes
pub(in crate::execution) fn remove_votes_for_removed_masternodes(
&self,
block_info: &BlockInfo,
last_committed_platform_state: &PlatformState,
block_platform_state: &PlatformState,
transaction: TransactionArg,
Expand All @@ -26,6 +28,7 @@ where
.remove_votes_for_removed_masternodes
{
0 => self.remove_votes_for_removed_masternodes_v0(
block_info,
last_committed_platform_state,
block_platform_state,
transaction,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::platform_types::platform::Platform;
use crate::platform_types::platform_state::v0::PlatformStateV0Methods;
use crate::platform_types::platform_state::PlatformState;
use crate::rpc::core::CoreRPCLike;
use dpp::block::block_info::BlockInfo;
use dpp::dashcore::hashes::Hash;
use dpp::version::PlatformVersion;
use drive::grovedb::TransactionArg;
Expand All @@ -14,6 +15,7 @@ where
/// Removes the votes for removed masternodes
pub(super) fn remove_votes_for_removed_masternodes_v0(
&self,
block_info: &BlockInfo,
last_committed_platform_state: &PlatformState,
block_platform_state: &PlatformState,
transaction: TransactionArg,
Expand All @@ -29,6 +31,9 @@ where
.iter()
.map(|pro_tx_hash| pro_tx_hash.as_byte_array().to_vec())
.collect(),
block_info.height,
self.config.network,
self.config.abci.chain_id.as_str(),
transaction,
platform_version,
)?;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ where
// Remove any votes that

self.remove_votes_for_removed_masternodes(
block_info,
last_committed_platform_state,
block_platform_state,
transaction,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11287,6 +11287,7 @@ mod tests {

platform
.remove_votes_for_removed_masternodes(
&BlockInfo::default(),
&platform_state_before_masternode_identity_removals,
&block_platform_state,
Some(&transaction),
Expand Down
7 changes: 5 additions & 2 deletions packages/rs-drive-abci/tests/strategy_tests/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2602,7 +2602,10 @@ mod tests {
&simple_signer,
&mut rng,
platform_version,
);
)
.into_iter()
.map(|(identity, transition)| (identity, Some(transition)))
.collect();

let strategy = NetworkStrategy {
strategy: Strategy {
Expand Down Expand Up @@ -3910,7 +3913,7 @@ mod tests {
strategy: Strategy {
start_contracts: vec![],
operations: vec![Operation {
op_type: OperationType::IdentityTransfer,
op_type: OperationType::IdentityTransfer(None),
frequency: Frequency {
times_per_block_range: 1..3,
chance_per_block: None,
Expand Down
16 changes: 13 additions & 3 deletions packages/rs-drive-abci/tests/strategy_tests/strategy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ use drive_abci::rpc::core::MockCoreRPCLike;
use rand::prelude::{IteratorRandom, SliceRandom, StdRng};
use rand::Rng;
use strategy_tests::Strategy;
use strategy_tests::transitions::{create_state_transitions_for_identities, create_state_transitions_for_identities_and_proofs, instant_asset_lock_proof_fixture, instant_asset_lock_proof_fixture_with_dynamic_range};
use strategy_tests::transitions::{create_state_transitions_for_identities, create_state_transitions_for_identities_and_proofs, instant_asset_lock_proof_fixture_with_dynamic_range};
use std::borrow::Cow;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::ops::RangeInclusive;
Expand Down Expand Up @@ -404,8 +404,18 @@ impl NetworkStrategy {
);
state_transitions.append(&mut new_transitions);
}
// Extend the state transitions with the strategy's hard coded start identities
// Filtering out the ones that have no create transition
if !self.strategy.start_identities.hard_coded.is_empty() {
state_transitions.extend(self.strategy.start_identities.hard_coded.clone());
state_transitions.extend(
self.strategy.start_identities.hard_coded.iter().filter_map(
|(identity, transition)| {
transition.as_ref().map(|create_transition| {
(identity.clone(), create_transition.clone())
})
},
),
);
}
}
let frequency = &self.strategy.identity_inserts.frequency;
Expand Down Expand Up @@ -1196,7 +1206,7 @@ impl NetworkStrategy {
operations.push(state_transition);
}
}
OperationType::IdentityTransfer if current_identities.len() > 1 => {
OperationType::IdentityTransfer(_) if current_identities.len() > 1 => {
let identities_clone = current_identities.clone();

// Sender is the first in the list, which should be loaded_identity
Expand Down
Loading

0 comments on commit 49ee181

Please sign in to comment.