Skip to content

Commit

Permalink
fix(consensus)!: improve evidence, optimisations, bug fixes (#1141)
Browse files Browse the repository at this point in the history
Description
---
- consensus: simplify and optimise (size and compute) evidence struct
- consensus: suspend leader failure while processing a proposal 
- consensus: optimise transaction prepare
- consensus: propose foreign proposals before proposing transaction
stage relating to foreign proposal
- consensus: remove unjustified blocks
- consensus: park block containing foreign proposals for missing
transactions
- consensus: add timer logs
- consensus: track and persist no votes for debugging purposes
- consensus: correct canonical command ordering in blocks
- remove message logging due to poor performance
- webui: show correct block status
- swarm: reduce setup time for many VNs
- consensus_tests: refactor to test unversioned input resolution
- tariswap_test_bench fixes
- log message size

Motivation and Context
---
Many consensus bug fixes and improvements.

Removed message logging (replaced with NopLogger) for now as observed
message queuing taking seconds. We can re-add and optimise it for async
writes perhaps by sending messages on an unbounded channel and writing
everything in the channel in a single sqlite transaction per batch.

How Has This Been Tested?
---
Existing tests, manually 20 VNs 2 shard groups

What process can a PR reviewer use to test or verify this change?
---
Run tariswap bench

Breaking Changes
---

- [ ] None
- [x] Requires data directory to be deleted
- [ ] Other - Please specify
  • Loading branch information
sdbondi authored Sep 17, 2024
1 parent f80d338 commit 695d703
Show file tree
Hide file tree
Showing 130 changed files with 3,845 additions and 2,215 deletions.
43 changes: 21 additions & 22 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ httpmock = "0.6.8"
humantime = "2.1.0"
humantime-serde = "1.1.1"
include_dir = "0.7.2"
indexmap = "2.2.6"
indexmap = "2.5.0"
indoc = "1.0.6"
itertools = "0.11.0"
lazy_static = "1.4.0"
Expand Down
1 change: 0 additions & 1 deletion applications/tari_dan_app_utilities/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ libp2p-identity = { workspace = true }
log = { workspace = true, features = ["std"] }
mini-moka = { workspace = true }
multiaddr = { workspace = true }
indexmap = { workspace = true }
std-semaphore = { workspace = true }
prost = { workspace = true }
rand = { workspace = true }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

use std::sync::Arc;

use indexmap::IndexMap;
use log::*;
use tari_common::configuration::Network;
use tari_common_types::types::PublicKey;
Expand Down Expand Up @@ -46,13 +45,13 @@ pub struct ExecutionOutput {
}

impl ExecutionOutput {
pub fn resolve_inputs(
pub fn resolve_inputs<'a, I: IntoIterator<Item = (&'a SubstateRequirement, &'a Substate)>>(
&self,
inputs: &IndexMap<SubstateRequirement, Substate>,
inputs: I,
) -> Vec<VersionedSubstateIdLockIntent> {
if let Some(diff) = self.result.finalize.accept() {
inputs
.iter()
.into_iter()
.map(|(substate_req, substate)| {
let requested_specific_version = substate_req.version().is_some();
let lock_flag = if diff.down_iter().any(|(id, _)| id == substate_req.substate_id()) {
Expand All @@ -73,7 +72,7 @@ impl ExecutionOutput {
// TODO: we might want to have a SubstateLockFlag::None for rejected transactions so that we still know the
// shards involved but do not lock them. We dont actually lock anything for rejected transactions anyway.
inputs
.iter()
.into_iter()
.map(|(substate_req, substate)| {
VersionedSubstateIdLockIntent::new(
VersionedSubstateId::new(substate_req.substate_id().clone(), substate.version()),
Expand Down
14 changes: 14 additions & 0 deletions applications/tari_swarm_daemon/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ impl InstanceConfig {
self
}

#[allow(dead_code)]
pub fn with_setting<K: Into<String>, V: ToString>(mut self, key: K, value: V) -> Self {
self.settings.insert(key.into(), value.to_string());
self
Expand All @@ -135,6 +136,19 @@ pub enum InstanceType {
TariSignalingServer,
}

impl InstanceType {
pub fn is_base_layer_node(self) -> bool {
matches!(self, InstanceType::MinoTariNode | InstanceType::MinoTariConsoleWallet)
}

pub fn is_tari_node(self) -> bool {
matches!(
self,
InstanceType::TariValidatorNode | InstanceType::TariIndexer | InstanceType::TariWalletDaemon
)
}
}

impl Display for InstanceType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self, f)
Expand Down
4 changes: 0 additions & 4 deletions applications/tari_swarm_daemon/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,6 @@ fn get_base_config(cli: &Cli) -> anyhow::Result<Config> {
InstanceConfig::new(InstanceType::MinoTariConsoleWallet)
.with_name("Minotari Wallet")
.with_num_instances(1),
// Let's mine 10 blocks on startup by default.
InstanceConfig::new(InstanceType::MinoTariMiner)
.with_name("Minotari Miner")
.with_setting("max_blocks", "10"),
InstanceConfig::new(InstanceType::TariValidatorNode)
.with_name("Validator node")
.with_num_instances(1),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright 2024 The Tari Project
// SPDX-License-Identifier: BSD-3-Clause

use std::{collections::HashMap, path::PathBuf};
use std::{collections::HashMap, path::PathBuf, process::ExitStatus};

use tokio::process::Child;

Expand All @@ -17,7 +17,7 @@ pub struct Instance {
allocated_ports: AllocatedPorts,
base_path: PathBuf,
settings: HashMap<String, String>,
is_running: bool,
exit_status: Option<ExitStatus>,
}

impl Instance {
Expand All @@ -38,7 +38,7 @@ impl Instance {
allocated_ports,
base_path,
settings,
is_running: true,
exit_status: None,
}
}

Expand Down Expand Up @@ -75,22 +75,22 @@ impl Instance {
}

pub fn is_running(&self) -> bool {
self.is_running
self.exit_status.is_none()
}

pub fn check_running(&mut self) -> bool {
if !self.is_running {
return false;
pub fn check_running(&mut self) -> anyhow::Result<Option<ExitStatus>> {
if let Some(status) = self.exit_status {
return Ok(Some(status));
}

// try_wait returns none if not exited
let is_running = self.child_mut().try_wait().map(|v| v.is_none()).unwrap_or(false);
self.is_running = is_running;
is_running
let status = self.child_mut().try_wait()?;
self.exit_status = status;
Ok(status)
}

pub async fn terminate(&mut self) -> anyhow::Result<()> {
if !self.is_running {
if !self.is_running() {
return Ok(());
}

Expand All @@ -99,7 +99,6 @@ impl Instance {
#[cfg(target_family = "windows")]
self.terminate_win().await?;

self.is_running = false;
Ok(())
}

Expand All @@ -115,14 +114,16 @@ impl Instance {

let pid = Pid::from_raw(pid as i32);
kill(pid, Signal::SIGINT)?;
self.child_mut().wait().await?;
let status = self.child_mut().wait().await?;
self.exit_status = Some(status);
Ok(())
}

#[cfg(target_family = "windows")]
async fn terminate_win(&mut self) -> anyhow::Result<()> {
// Should probably also implement a clean exit
self.child_mut().kill().await?;
self.exit_status = Some(ExitStatus::default());
Ok(())
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -178,15 +178,10 @@ impl InstanceManager {
settings,
);

// Check if the instance is still running after 2 seconds (except miner *cough*)
if matches!(instance_type, InstanceType::MinoTariMiner) {
// Update the miners is_running status
instance.check_running();
} else {
// Wait for base layer nodes to start
if instance_type.is_base_layer_node() {
sleep(Duration::from_secs(2)).await;
if !instance.check_running() {
return Err(anyhow!("Failed to start instance {instance_id} {instance_type}"));
}
instance.check_running().context("Failed to start instance")?;
}

log::info!(
Expand Down Expand Up @@ -242,6 +237,10 @@ impl InstanceManager {
self.validator_nodes.values()
}

pub fn num_instances(&self) -> usize {
self.instances().count()
}

pub fn num_validator_nodes(&self) -> u64 {
self.validator_nodes.len() as u64
}
Expand Down
Loading

0 comments on commit 695d703

Please sign in to comment.