From 2ccd9430a1aba4c0eea8387d10837206a2b4145a Mon Sep 17 00:00:00 2001 From: wacban Date: Mon, 9 Dec 2024 15:34:37 +0100 Subject: [PATCH 01/26] assert that buffers are empty iff buffered gas in info is zero --- runtime/runtime/src/congestion_control.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/runtime/runtime/src/congestion_control.rs b/runtime/runtime/src/congestion_control.rs index 5427f83cee1..2071305ad63 100644 --- a/runtime/runtime/src/congestion_control.rs +++ b/runtime/runtime/src/congestion_control.rs @@ -258,17 +258,27 @@ impl ReceiptSinkV2 { parent_shard_ids.intersection(&shard_ids.clone().into_iter().collect()).count() == 0 ); + let mut all_buffers_empty = true; + // First forward any receipts that may still be in the outgoing buffers // of the parent shards. for &shard_id in &parent_shard_ids { self.forward_from_buffer_to_shard(shard_id, state_update, apply_state, &shard_layout)?; + let is_buffer_empty = self.outgoing_buffers.to_shard(shard_id).len() == 0; + all_buffers_empty &= is_buffer_empty; } // Then forward receipts from the outgoing buffers of the shard in the // current shard layout. for &shard_id in &shard_ids { self.forward_from_buffer_to_shard(shard_id, state_update, apply_state, &shard_layout)?; + let is_buffer_empty = self.outgoing_buffers.to_shard(shard_id).len() == 0; + all_buffers_empty &= is_buffer_empty; } + + // Assert that empty buffers match zero buffered gas. + assert_eq!(all_buffers_empty, self.own_congestion_info.buffered_receipts_gas() == 0); + Ok(()) } From bb57b0167182aeb32ff29b986c65be5a4f33a320 Mon Sep 17 00:00:00 2001 From: wacban Date: Mon, 9 Dec 2024 17:37:16 +0100 Subject: [PATCH 02/26] brute force congestion info --- chain/chain/src/resharding/manager.rs | 26 +++++++++++++++++++++-- core/primitives/src/types.rs | 10 +++++++++ runtime/runtime/src/congestion_control.rs | 6 ++++++ runtime/runtime/src/lib.rs | 9 ++------ 4 files changed, 42 insertions(+), 9 deletions(-) diff --git a/chain/chain/src/resharding/manager.rs b/chain/chain/src/resharding/manager.rs index 5d3bd24e3fd..13dce262295 100644 --- a/chain/chain/src/resharding/manager.rs +++ b/chain/chain/src/resharding/manager.rs @@ -19,10 +19,12 @@ use near_store::trie::mem::mem_trie_update::TrackingMode; use near_store::trie::ops::resharding::RetainMode; use near_store::trie::TrieRecorder; use near_store::{DBCol, ShardTries, ShardUId, Store}; +use node_runtime::bootstrap_congestion_info; pub struct ReshardingManager { store: Store, epoch_manager: Arc, + runtime_adapter: Arc, /// Configuration for resharding. pub resharding_config: MutableConfigValue, /// A handle that allows the main process to interrupt resharding if needed. @@ -42,12 +44,19 @@ impl ReshardingManager { ) -> Self { let resharding_handle = ReshardingHandle::new(); let flat_storage_resharder = FlatStorageResharder::new( - runtime_adapter, + runtime_adapter.clone(), resharding_sender, FlatStorageResharderController::from_resharding_handle(resharding_handle.clone()), resharding_config.clone(), ); - Self { store, epoch_manager, resharding_config, flat_storage_resharder, resharding_handle } + Self { + store, + epoch_manager, + runtime_adapter, + resharding_config, + flat_storage_resharder, + resharding_handle, + } } /// Trigger resharding if shard layout changes after the given block. @@ -213,11 +222,24 @@ impl ReshardingManager { }; let mem_changes = trie_changes.mem_trie_changes.as_ref().unwrap(); let new_state_root = mem_tries.apply_memtrie_changes(block_height, mem_changes); + drop(mem_tries); + // TODO(resharding): set all fields of `ChunkExtra`. Consider stronger // typing. Clarify where it should happen when `State` and // `FlatState` update is implemented. let mut child_chunk_extra = ChunkExtra::clone(&chunk_extra); *child_chunk_extra.state_root_mut() = new_state_root; + // TODO(resharding) - Implement proper congestion info for + // resharding. The current implementation is very expensive. + if let Some(congestion_info) = child_chunk_extra.congestion_info_mut() { + let epoch_id = block.header().epoch_id(); + let protocol_version = self.epoch_manager.get_epoch_protocol_version(epoch_id)?; + + let trie = tries.get_trie_for_shard(new_shard_uid, new_state_root); + let config = self.runtime_adapter.get_runtime_config(protocol_version)?; + let new_shard_id = new_shard_uid.shard_id(); + *congestion_info = bootstrap_congestion_info(&trie, &config, new_shard_id)?; + } chain_store_update.save_chunk_extra(block_hash, &new_shard_uid, child_chunk_extra); chain_store_update.save_state_transition_data( diff --git a/core/primitives/src/types.rs b/core/primitives/src/types.rs index c636452b91d..a143fbdea7c 100644 --- a/core/primitives/src/types.rs +++ b/core/primitives/src/types.rs @@ -957,6 +957,16 @@ pub mod chunk_extra { } } + #[inline] + pub fn congestion_info_mut(&mut self) -> Option<&mut CongestionInfo> { + match self { + Self::V1(_) => None, + Self::V2(_) => None, + Self::V3(v3) => Some(&mut v3.congestion_info), + Self::V4(v4) => Some(&mut v4.congestion_info), + } + } + #[inline] pub fn bandwidth_requests(&self) -> Option<&BandwidthRequests> { match self { diff --git a/runtime/runtime/src/congestion_control.rs b/runtime/runtime/src/congestion_control.rs index 2071305ad63..7f2691e2ad4 100644 --- a/runtime/runtime/src/congestion_control.rs +++ b/runtime/runtime/src/congestion_control.rs @@ -635,6 +635,9 @@ pub fn bootstrap_congestion_info( config: &RuntimeConfig, shard_id: ShardId, ) -> Result { + tracing::warn!(target: "runtime", "starting to bootstrap congestion info, this might take a while"); + let start = std::time::Instant::now(); + let mut receipt_bytes: u64 = 0; let mut delayed_receipts_gas: u128 = 0; let mut buffered_receipts_gas: u128 = 0; @@ -663,6 +666,9 @@ pub fn bootstrap_congestion_info( } } + let time = start.elapsed(); + tracing::warn!(target: "runtime","bootstrapping congestion info done after {time:#.1?}"); + Ok(CongestionInfo::V1(CongestionInfoV1 { delayed_receipts_gas: delayed_receipts_gas as u128, buffered_receipts_gas: buffered_receipts_gas as u128, diff --git a/runtime/runtime/src/lib.rs b/runtime/runtime/src/lib.rs index 1ca1a43b3de..cfbb34df768 100644 --- a/runtime/runtime/src/lib.rs +++ b/runtime/runtime/src/lib.rs @@ -2230,13 +2230,8 @@ impl ApplyState { return Ok(Some(congestion_info.congestion_info)); } - tracing::warn!(target: "runtime", "starting to bootstrap congestion info, this might take a while"); - let start = std::time::Instant::now(); - let result = bootstrap_congestion_info(trie, &self.config, self.shard_id); - let time = start.elapsed(); - tracing::warn!(target: "runtime","bootstrapping congestion info done after {time:#.1?}"); - let computed = result?; - Ok(Some(computed)) + let congestion_info = bootstrap_congestion_info(trie, &self.config, self.shard_id)?; + Ok(Some(congestion_info)) } } From ffe30d9037b8991ad38a3d61e93b85f9a0533770 Mon Sep 17 00:00:00 2001 From: wacban Date: Wed, 11 Dec 2024 20:12:37 +0100 Subject: [PATCH 03/26] debug logs --- Justfile | 2 +- core/o11y/src/testonly.rs | 2 +- core/primitives-core/src/hash.rs | 3 ++- integration-tests/src/test_loop/tests/resharding_v3.rs | 2 ++ 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Justfile b/Justfile index 91dfadfa9d2..f8614e41392 100644 --- a/Justfile +++ b/Justfile @@ -2,7 +2,7 @@ # them at earliest convenience :) # Also in addition to this, the `nextest-integration` test is currently disabled on macos platform_excludes := if os() == "macos" { - "--exclude node-runtime --exclude runtime-params-estimator --exclude near-network --exclude estimator-warehouse --exclude integration-tests" + "" } else if os() == "windows" { "--exclude node-runtime --exclude runtime-params-estimator --exclude near-network --exclude estimator-warehouse --exclude integration-tests" } else { diff --git a/core/o11y/src/testonly.rs b/core/o11y/src/testonly.rs index e8bffd8a5a8..bda6b69cf4d 100644 --- a/core/o11y/src/testonly.rs +++ b/core/o11y/src/testonly.rs @@ -23,7 +23,7 @@ fn setup_subscriber_from_filter(mut env_filter: EnvFilter) { let _ = fmt::Subscriber::builder() .with_ansi(use_color_auto()) - .with_span_events(fmt::format::FmtSpan::CLOSE) + .with_span_events(fmt::format::FmtSpan::NONE) .with_env_filter(env_filter) .with_writer(fmt::TestWriter::new()) .with_timer(TestUptime::default()) diff --git a/core/primitives-core/src/hash.rs b/core/primitives-core/src/hash.rs index 194facb1aa6..08730bdc717 100644 --- a/core/primitives-core/src/hash.rs +++ b/core/primitives-core/src/hash.rs @@ -209,7 +209,8 @@ impl fmt::Debug for CryptoHash { impl fmt::Display for CryptoHash { fn fmt(&self, fmtr: &mut fmt::Formatter<'_>) -> fmt::Result { - self.to_base58_impl(|encoded| fmtr.write_str(encoded)) + // TODO remove me debugging only + self.to_base58_impl(|encoded| fmtr.write_str(&encoded[..4])) } } diff --git a/integration-tests/src/test_loop/tests/resharding_v3.rs b/integration-tests/src/test_loop/tests/resharding_v3.rs index 2f4e2045c87..bfd519be992 100644 --- a/integration-tests/src/test_loop/tests/resharding_v3.rs +++ b/integration-tests/src/test_loop/tests/resharding_v3.rs @@ -757,6 +757,8 @@ fn test_resharding_v3_base(params: TestReshardingParameters) { if params.all_chunks_expected && params.chunk_ranges_to_drop.is_empty() { assert!(block_header.chunk_mask().iter().all(|chunk_bit| *chunk_bit)); } + + tracing::info!(target: "test", epoch_id=?tip.epoch_id, height=?tip.height, "new block"); } // Return true if we passed an epoch with increased number of shards. From 9d04209cabfa3a66c2236ed8125f58c9abf2605a Mon Sep 17 00:00:00 2001 From: wacban Date: Thu, 12 Dec 2024 12:23:58 +0100 Subject: [PATCH 04/26] brute force works --- chain/chain/src/resharding/manager.rs | 17 ++++++++++++ .../stateless_validation/chunk_validation.rs | 26 +++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/chain/chain/src/resharding/manager.rs b/chain/chain/src/resharding/manager.rs index 13dce262295..187dbc498e4 100644 --- a/chain/chain/src/resharding/manager.rs +++ b/chain/chain/src/resharding/manager.rs @@ -6,6 +6,7 @@ use super::types::ReshardingSender; use crate::flat_storage_resharder::{FlatStorageResharder, FlatStorageResharderController}; use crate::types::RuntimeAdapter; use crate::ChainStoreUpdate; +use itertools::Itertools; use near_chain_configs::{MutableConfigValue, ReshardingConfig, ReshardingHandle}; use near_chain_primitives::Error; use near_epoch_manager::EpochManagerAdapter; @@ -239,6 +240,22 @@ impl ReshardingManager { let config = self.runtime_adapter.get_runtime_config(protocol_version)?; let new_shard_id = new_shard_uid.shard_id(); *congestion_info = bootstrap_congestion_info(&trie, &config, new_shard_id)?; + + // Please note the usage of the child shard layout here. + let next_epoch_id = self.epoch_manager.get_next_epoch_id(block_hash)?; + let next_shard_layout = self.epoch_manager.get_shard_layout(&next_epoch_id)?; + let all_shards = next_shard_layout.shard_ids().collect_vec(); + let own_shard = new_shard_uid.shard_id(); + let own_shard_index = next_shard_layout + .get_shard_index(own_shard)? + .try_into() + .expect("ShardIndex must fit in u64"); + + // Use simplified congestion seed. The proper one should be + // block height + shard index, however the block heigh is not + // easily available in all required places. + let congestion_seed = own_shard_index; + congestion_info.finalize_allowed_shard(own_shard, &all_shards, congestion_seed); } chain_store_update.save_chunk_extra(block_hash, &new_shard_uid, child_chunk_extra); diff --git a/chain/chain/src/stateless_validation/chunk_validation.rs b/chain/chain/src/stateless_validation/chunk_validation.rs index 4aa7df2b98a..87d4a5f5c8b 100644 --- a/chain/chain/src/stateless_validation/chunk_validation.rs +++ b/chain/chain/src/stateless_validation/chunk_validation.rs @@ -13,6 +13,7 @@ use crate::types::{ }; use crate::validate::validate_chunk_with_chunk_extra_and_receipts_root; use crate::{Chain, ChainStore, ChainStoreAccess}; +use itertools::Itertools; use lru::LruCache; use near_async::futures::AsyncComputationSpawnerExt; use near_chain_primitives::Error; @@ -35,6 +36,7 @@ use near_primitives::types::{AccountId, ProtocolVersion, ShardId, ShardIndex}; use near_primitives::utils::compression::CompressedData; use near_store::trie::ops::resharding::RetainMode; use near_store::{PartialStorage, Trie}; +use node_runtime::bootstrap_congestion_info; use std::collections::HashMap; use std::num::NonZeroUsize; use std::sync::{Arc, Mutex}; @@ -718,6 +720,30 @@ pub fn validate_chunk_state_witness( true, ); let new_root = trie.retain_split_shard(&boundary_account, retain_mode)?; + + if let Some(congestion_info) = chunk_extra.congestion_info_mut() { + // let trie = tries.get_trie_for_shard(new_shard_uid, new_state_root); + let config = runtime_adapter.get_runtime_config(protocol_version)?; + let new_shard_id = child_shard_uid.shard_id(); + *congestion_info = bootstrap_congestion_info(&trie, &config, new_shard_id)?; + + // Please note the usage of the child shard layout here. + let next_epoch_id = epoch_manager.get_next_epoch_id(&block_hash)?; + let next_shard_layout = epoch_manager.get_shard_layout(&next_epoch_id)?; + let all_shards = next_shard_layout.shard_ids().collect_vec(); + let own_shard = new_shard_id; + let own_shard_index = next_shard_layout + .get_shard_index(own_shard)? + .try_into() + .expect("ShardIndex must fit in u64"); + + // Use simplified congestion seed. The proper one should be + // block height + shard index, however the block heigh is not + // easily available in all required places. + let congestion_seed = own_shard_index; + congestion_info.finalize_allowed_shard(own_shard, &all_shards, congestion_seed); + } + (child_shard_uid, new_root) } }; From 78b5bc84bd74711f40162d8ad94bf7888091bac8 Mon Sep 17 00:00:00 2001 From: wacban Date: Thu, 12 Dec 2024 17:37:08 +0100 Subject: [PATCH 05/26] does not work --- chain/chain/src/resharding/manager.rs | 147 ++++++++++++++++++++------ core/store/src/trie/ops/resharding.rs | 2 +- 2 files changed, 117 insertions(+), 32 deletions(-) diff --git a/chain/chain/src/resharding/manager.rs b/chain/chain/src/resharding/manager.rs index 187dbc498e4..e39c928f0f2 100644 --- a/chain/chain/src/resharding/manager.rs +++ b/chain/chain/src/resharding/manager.rs @@ -12,12 +12,14 @@ use near_chain_primitives::Error; use near_epoch_manager::EpochManagerAdapter; use near_primitives::block::Block; use near_primitives::challenge::PartialState; +use near_primitives::congestion_info::CongestionInfo; use near_primitives::hash::CryptoHash; use near_primitives::shard_layout::{get_block_shard_uid, ShardLayout}; use near_primitives::types::chunk_extra::ChunkExtra; use near_store::adapter::{StoreAdapter, StoreUpdateAdapter}; use near_store::trie::mem::mem_trie_update::TrackingMode; use near_store::trie::ops::resharding::RetainMode; +use near_store::trie::outgoing_metadata::ReceiptGroupsQueue; use near_store::trie::TrieRecorder; use near_store::{DBCol, ShardTries, ShardUId, Store}; use node_runtime::bootstrap_congestion_info; @@ -187,7 +189,7 @@ impl ReshardingManager { // blocks, the second finalization will crash. tries.freeze_mem_tries(parent_shard_uid, split_shard_event.children_shards())?; - let chunk_extra = self.get_chunk_extra(block_hash, &parent_shard_uid)?; + let parent_chunk_extra = self.get_chunk_extra(block_hash, &parent_shard_uid)?; let boundary_account = split_shard_event.boundary_account; let mut trie_store_update = self.store.store_update(); @@ -211,10 +213,11 @@ impl ReshardingManager { target: "resharding", ?new_shard_uid, ?retain_mode, "Creating child memtrie by retaining nodes in parent memtrie..." ); + let mut mem_tries = mem_tries.write().unwrap(); let mut trie_recorder = TrieRecorder::new(); let mode = TrackingMode::RefcountsAndAccesses(&mut trie_recorder); - let mem_trie_update = mem_tries.update(*chunk_extra.state_root(), mode)?; + let mem_trie_update = mem_tries.update(*parent_chunk_extra.state_root(), mode)?; let trie_changes = mem_trie_update.retain_split_shard(&boundary_account, retain_mode); let partial_storage = trie_recorder.recorded_storage(); @@ -228,35 +231,15 @@ impl ReshardingManager { // TODO(resharding): set all fields of `ChunkExtra`. Consider stronger // typing. Clarify where it should happen when `State` and // `FlatState` update is implemented. - let mut child_chunk_extra = ChunkExtra::clone(&chunk_extra); - *child_chunk_extra.state_root_mut() = new_state_root; - // TODO(resharding) - Implement proper congestion info for - // resharding. The current implementation is very expensive. - if let Some(congestion_info) = child_chunk_extra.congestion_info_mut() { - let epoch_id = block.header().epoch_id(); - let protocol_version = self.epoch_manager.get_epoch_protocol_version(epoch_id)?; - - let trie = tries.get_trie_for_shard(new_shard_uid, new_state_root); - let config = self.runtime_adapter.get_runtime_config(protocol_version)?; - let new_shard_id = new_shard_uid.shard_id(); - *congestion_info = bootstrap_congestion_info(&trie, &config, new_shard_id)?; - - // Please note the usage of the child shard layout here. - let next_epoch_id = self.epoch_manager.get_next_epoch_id(block_hash)?; - let next_shard_layout = self.epoch_manager.get_shard_layout(&next_epoch_id)?; - let all_shards = next_shard_layout.shard_ids().collect_vec(); - let own_shard = new_shard_uid.shard_id(); - let own_shard_index = next_shard_layout - .get_shard_index(own_shard)? - .try_into() - .expect("ShardIndex must fit in u64"); - - // Use simplified congestion seed. The proper one should be - // block height + shard index, however the block heigh is not - // easily available in all required places. - let congestion_seed = own_shard_index; - congestion_info.finalize_allowed_shard(own_shard, &all_shards, congestion_seed); - } + let child_chunk_extra = self.get_child_chunk_extra( + block, + &tries, + &parent_chunk_extra, + new_state_root, + new_shard_uid, + parent_shard_uid, + retain_mode, + )?; chain_store_update.save_chunk_extra(block_hash, &new_shard_uid, child_chunk_extra); chain_store_update.save_state_transition_data( @@ -289,6 +272,108 @@ impl ReshardingManager { Ok(()) } + fn get_child_chunk_extra( + &mut self, + block: &Block, + tries: &ShardTries, + parent_chunk_extra: &Arc, + new_state_root: CryptoHash, + new_shard_uid: ShardUId, + parent_shard_uid: ShardUId, + retain_mode: RetainMode, + ) -> Result { + let mut child_chunk_extra = ChunkExtra::clone(parent_chunk_extra); + *child_chunk_extra.state_root_mut() = new_state_root; + + if let Some(congestion_info) = child_chunk_extra.congestion_info_mut() { + let &parent_state_root = parent_chunk_extra.state_root(); + *congestion_info = self.get_child_congestion_info( + block, + tries, + parent_shard_uid, + parent_state_root, + new_shard_uid, + new_state_root, + retain_mode, + &congestion_info, + )?; + + // Please note the usage of the child shard layout here. + let next_epoch_id = self.epoch_manager.get_next_epoch_id(block.hash())?; + let next_shard_layout = self.epoch_manager.get_shard_layout(&next_epoch_id)?; + let all_shards = next_shard_layout.shard_ids().collect_vec(); + let own_shard = new_shard_uid.shard_id(); + let own_shard_index = next_shard_layout + .get_shard_index(own_shard)? + .try_into() + .expect("ShardIndex must fit in u64"); + + // Use simplified congestion seed. The proper one should be + // block height + shard index, however the block heigh is not + // easily available in all required places. + let congestion_seed = own_shard_index; + congestion_info.finalize_allowed_shard(own_shard, &all_shards, congestion_seed); + } + Ok(child_chunk_extra) + } + + fn get_child_congestion_info( + &mut self, + block: &Block, + tries: &ShardTries, + parent_shard_uid: ShardUId, + parent_state_root: CryptoHash, + new_shard_uid: ShardUId, + new_state_root: CryptoHash, + retain_mode: RetainMode, + congestion_info: &CongestionInfo, + ) -> Result { + if retain_mode == RetainMode::Left { + return Ok(congestion_info.clone()); + } + + // left child -> unchanged + // right child -> remove the buffered receipts + let epoch_id = block.header().epoch_id(); + let shard_layout = self.epoch_manager.get_shard_layout(&epoch_id)?; + let protocol_version = self.epoch_manager.get_epoch_protocol_version(epoch_id)?; + + let trie = tries.get_trie_for_shard(parent_shard_uid, parent_state_root); + let mut smart_congestion_info = congestion_info.clone(); + for shard_id in shard_layout.shard_ids() { + let receipt_groups = ReceiptGroupsQueue::load(&trie, shard_id)?; + let Some(receipt_groups) = receipt_groups else { + tracing::info!(target: "boom", ?shard_id, "no receipt group found!"); + continue; + }; + + let bytes = receipt_groups.total_size(); + let gas = receipt_groups.total_gas(); + let gas = gas.try_into().expect("ReceiptGroup gas must fit in u64"); + + tracing::info!(target: "boom", ?shard_id, ?bytes, ?gas, "new receipt group found!"); + + smart_congestion_info + .remove_buffered_receipt_gas(gas) + .expect("Buffered gas must not exceed congestion info buffered gas"); + smart_congestion_info + .remove_receipt_bytes(bytes) + .expect("Buffered size must not exceed congestion info buffered size"); + } + + assert_eq!(smart_congestion_info.buffered_receipts_gas(), 0); + + let trie = tries.get_trie_for_shard(new_shard_uid, new_state_root); + let config = self.runtime_adapter.get_runtime_config(protocol_version)?; + let new_shard_id = new_shard_uid.shard_id(); + let congestion_info = bootstrap_congestion_info(&trie, &config, new_shard_id)?; + + smart_congestion_info.set_allowed_shard(congestion_info.allowed_shard()); + assert_eq!(congestion_info, smart_congestion_info); + + Ok(congestion_info) + } + // TODO(store): Use proper store interface fn get_chunk_extra( &self, diff --git a/core/store/src/trie/ops/resharding.rs b/core/store/src/trie/ops/resharding.rs index e7a78ddd4ae..05c0ed42665 100644 --- a/core/store/src/trie/ops/resharding.rs +++ b/core/store/src/trie/ops/resharding.rs @@ -14,7 +14,7 @@ use super::interface::{ }; use super::squash::GenericTrieUpdateSquash; -#[derive(Debug)] +#[derive(Debug, Clone, Copy, Eq, PartialEq)] /// Whether to retain left or right part of trie after shard split. pub enum RetainMode { Left, From f5ddc63890429ceeaf527426bd12281aa07a5da3 Mon Sep 17 00:00:00 2001 From: wacban Date: Fri, 13 Dec 2024 12:33:11 +0100 Subject: [PATCH 06/26] protocol version order, bandwidth scheduler state resharding --- chain/chain/src/flat_storage_resharder.rs | 5 ++++- core/primitives-core/src/version.rs | 14 ++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 83bd481f3fa..2b7db1e3e78 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -997,7 +997,10 @@ fn shard_split_handle_key_value( | col::BANDWIDTH_SCHEDULER_STATE => { copy_kv_to_all_children(&split_params, key, value, store_update) } - col::BUFFERED_RECEIPT_INDICES | col::BUFFERED_RECEIPT => { + col::BUFFERED_RECEIPT_INDICES + | col::BUFFERED_RECEIPT + | col::BUFFERED_RECEIPT_GROUPS_QUEUE_DATA + | col::BUFFERED_RECEIPT_GROUPS_QUEUE_ITEM => { copy_kv_to_left_child(&split_params, key, value, store_update) } _ => unreachable!("key: {:?} should not appear in flat store!", key), diff --git a/core/primitives-core/src/version.rs b/core/primitives-core/src/version.rs index cb8e9b4e6fa..921f9409a56 100644 --- a/core/primitives-core/src/version.rs +++ b/core/primitives-core/src/version.rs @@ -268,12 +268,18 @@ impl ProtocolFeature { // TODO(#11201): When stabilizing this feature in mainnet, also remove the temporary code // that always enables this for mocknet (see config_mocknet function). ProtocolFeature::ShuffleShardAssignments => 143, + // CurrentEpochStateSync must be enabled before ReshardingV3! When + // releasing this feature please make sure to schedule separate + // protocol upgrades for those features! ProtocolFeature::CurrentEpochStateSync => 144, - ProtocolFeature::SimpleNightshadeV4 => 145, + // BandwidthScheduler must be enabled before ReshardingV3! When + // releasing this feature please make sure to schedule separate + // protocol upgrades for those features! + ProtocolFeature::BandwidthScheduler => 145, + ProtocolFeature::SimpleNightshadeV4 => 146, #[cfg(feature = "protocol_feature_relaxed_chunk_validation")] - ProtocolFeature::RelaxedChunkValidation => 146, - ProtocolFeature::ExcludeExistingCodeFromWitnessForCodeLen => 147, - ProtocolFeature::BandwidthScheduler => 148, + ProtocolFeature::RelaxedChunkValidation => 147, + ProtocolFeature::ExcludeExistingCodeFromWitnessForCodeLen => 148, ProtocolFeature::BlockHeightForReceiptId => 149, // Place features that are not yet in Nightly below this line. } From 419e804665c73c0eb4f791742908b8353698c346 Mon Sep 17 00:00:00 2001 From: wacban Date: Fri, 13 Dec 2024 12:45:00 +0100 Subject: [PATCH 07/26] allow negative refcount --- integration-tests/src/test_loop/tests/resharding_v3.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/integration-tests/src/test_loop/tests/resharding_v3.rs b/integration-tests/src/test_loop/tests/resharding_v3.rs index bfd519be992..c8c34f47ed7 100644 --- a/integration-tests/src/test_loop/tests/resharding_v3.rs +++ b/integration-tests/src/test_loop/tests/resharding_v3.rs @@ -968,6 +968,7 @@ fn test_resharding_v3_split_parent_buffered_receipts_base(base_shard_layout_vers let account_in_right_child: AccountId = "account6".parse().unwrap(); let params = TestReshardingParametersBuilder::default() .base_shard_layout_version(base_shard_layout_version) + .allow_negative_refcount() .deploy_test_contract(receiver_account.clone()) .limit_outgoing_gas(true) .add_loop_action(call_burn_gas_contract( From 8b73a77a636108c394416bce495e7b35066f8cc2 Mon Sep 17 00:00:00 2001 From: wacban Date: Fri, 13 Dec 2024 13:43:38 +0100 Subject: [PATCH 08/26] todos --- core/primitives-core/src/version.rs | 1 + integration-tests/src/test_loop/tests/resharding_v3.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/core/primitives-core/src/version.rs b/core/primitives-core/src/version.rs index 921f9409a56..76f587ef63c 100644 --- a/core/primitives-core/src/version.rs +++ b/core/primitives-core/src/version.rs @@ -275,6 +275,7 @@ impl ProtocolFeature { // BandwidthScheduler must be enabled before ReshardingV3! When // releasing this feature please make sure to schedule separate // protocol upgrades for those features! + // TODO ask longarithm to update the forknet instructions ProtocolFeature::BandwidthScheduler => 145, ProtocolFeature::SimpleNightshadeV4 => 146, #[cfg(feature = "protocol_feature_relaxed_chunk_validation")] diff --git a/integration-tests/src/test_loop/tests/resharding_v3.rs b/integration-tests/src/test_loop/tests/resharding_v3.rs index c8c34f47ed7..c2a7a58ce7a 100644 --- a/integration-tests/src/test_loop/tests/resharding_v3.rs +++ b/integration-tests/src/test_loop/tests/resharding_v3.rs @@ -189,7 +189,7 @@ impl TestReshardingParametersBuilder { limit_outgoing_gas: self.limit_outgoing_gas.unwrap_or(false), delay_flat_state_resharding: self.delay_flat_state_resharding.unwrap_or(0), short_yield_timeout: self.short_yield_timeout.unwrap_or(false), - allow_negative_refcount: self.allow_negative_refcount.unwrap_or(false), + allow_negative_refcount: self.allow_negative_refcount.unwrap_or(true), } } From 0457ee3e48c229983ca65fd1ed5c079f24c4838f Mon Sep 17 00:00:00 2001 From: wacban Date: Fri, 13 Dec 2024 13:44:26 +0100 Subject: [PATCH 09/26] cleanup --- Justfile | 2 +- core/o11y/src/testonly.rs | 2 +- core/primitives-core/src/hash.rs | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Justfile b/Justfile index f8614e41392..91dfadfa9d2 100644 --- a/Justfile +++ b/Justfile @@ -2,7 +2,7 @@ # them at earliest convenience :) # Also in addition to this, the `nextest-integration` test is currently disabled on macos platform_excludes := if os() == "macos" { - "" + "--exclude node-runtime --exclude runtime-params-estimator --exclude near-network --exclude estimator-warehouse --exclude integration-tests" } else if os() == "windows" { "--exclude node-runtime --exclude runtime-params-estimator --exclude near-network --exclude estimator-warehouse --exclude integration-tests" } else { diff --git a/core/o11y/src/testonly.rs b/core/o11y/src/testonly.rs index bda6b69cf4d..e8bffd8a5a8 100644 --- a/core/o11y/src/testonly.rs +++ b/core/o11y/src/testonly.rs @@ -23,7 +23,7 @@ fn setup_subscriber_from_filter(mut env_filter: EnvFilter) { let _ = fmt::Subscriber::builder() .with_ansi(use_color_auto()) - .with_span_events(fmt::format::FmtSpan::NONE) + .with_span_events(fmt::format::FmtSpan::CLOSE) .with_env_filter(env_filter) .with_writer(fmt::TestWriter::new()) .with_timer(TestUptime::default()) diff --git a/core/primitives-core/src/hash.rs b/core/primitives-core/src/hash.rs index 08730bdc717..194facb1aa6 100644 --- a/core/primitives-core/src/hash.rs +++ b/core/primitives-core/src/hash.rs @@ -209,8 +209,7 @@ impl fmt::Debug for CryptoHash { impl fmt::Display for CryptoHash { fn fmt(&self, fmtr: &mut fmt::Formatter<'_>) -> fmt::Result { - // TODO remove me debugging only - self.to_base58_impl(|encoded| fmtr.write_str(&encoded[..4])) + self.to_base58_impl(|encoded| fmtr.write_str(encoded)) } } From e432db8cb70a0b569eb3916ddcf0b4357f417038 Mon Sep 17 00:00:00 2001 From: wacban Date: Fri, 13 Dec 2024 13:50:03 +0100 Subject: [PATCH 10/26] small nits --- chain/chain/src/resharding/manager.rs | 9 +++------ core/primitives-core/src/version.rs | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/chain/chain/src/resharding/manager.rs b/chain/chain/src/resharding/manager.rs index e39c928f0f2..f80c6f0e3e4 100644 --- a/chain/chain/src/resharding/manager.rs +++ b/chain/chain/src/resharding/manager.rs @@ -228,9 +228,6 @@ impl ReshardingManager { let new_state_root = mem_tries.apply_memtrie_changes(block_height, mem_changes); drop(mem_tries); - // TODO(resharding): set all fields of `ChunkExtra`. Consider stronger - // typing. Clarify where it should happen when `State` and - // `FlatState` update is implemented. let child_chunk_extra = self.get_child_chunk_extra( block, &tries, @@ -282,6 +279,9 @@ impl ReshardingManager { parent_shard_uid: ShardUId, retain_mode: RetainMode, ) -> Result { + // TODO(resharding): set all fields of `ChunkExtra`. Consider stronger + // typing. Clarify where it should happen when `State` and + // `FlatState` update is implemented. let mut child_chunk_extra = ChunkExtra::clone(parent_chunk_extra); *child_chunk_extra.state_root_mut() = new_state_root; @@ -343,7 +343,6 @@ impl ReshardingManager { for shard_id in shard_layout.shard_ids() { let receipt_groups = ReceiptGroupsQueue::load(&trie, shard_id)?; let Some(receipt_groups) = receipt_groups else { - tracing::info!(target: "boom", ?shard_id, "no receipt group found!"); continue; }; @@ -351,8 +350,6 @@ impl ReshardingManager { let gas = receipt_groups.total_gas(); let gas = gas.try_into().expect("ReceiptGroup gas must fit in u64"); - tracing::info!(target: "boom", ?shard_id, ?bytes, ?gas, "new receipt group found!"); - smart_congestion_info .remove_buffered_receipt_gas(gas) .expect("Buffered gas must not exceed congestion info buffered gas"); diff --git a/core/primitives-core/src/version.rs b/core/primitives-core/src/version.rs index 76f587ef63c..b58568a8952 100644 --- a/core/primitives-core/src/version.rs +++ b/core/primitives-core/src/version.rs @@ -348,3 +348,18 @@ macro_rules! checked_feature { } }}; } + +#[cfg(test)] +mod tests { + use super::ProtocolFeature; + + #[test] + fn test_resharding_dependencies() { + let state_sync = ProtocolFeature::CurrentEpochStateSync.protocol_version(); + let bandwidth_scheduler = ProtocolFeature::BandwidthScheduler.protocol_version(); + let resharding_v3 = ProtocolFeature::SimpleNightshadeV4.protocol_version(); + + assert!(state_sync < resharding_v3); + assert!(bandwidth_scheduler < resharding_v3); + } +} From 4eea6ad23cb84d80ed89d6cb92e2d00d7de13c97 Mon Sep 17 00:00:00 2001 From: wacban Date: Fri, 13 Dec 2024 14:37:25 +0100 Subject: [PATCH 11/26] stateless validation --- chain/chain/src/resharding/manager.rs | 115 ++++++++---------- .../stateless_validation/chunk_validation.rs | 23 +++- 2 files changed, 72 insertions(+), 66 deletions(-) diff --git a/chain/chain/src/resharding/manager.rs b/chain/chain/src/resharding/manager.rs index f80c6f0e3e4..97eac43700f 100644 --- a/chain/chain/src/resharding/manager.rs +++ b/chain/chain/src/resharding/manager.rs @@ -21,13 +21,11 @@ use near_store::trie::mem::mem_trie_update::TrackingMode; use near_store::trie::ops::resharding::RetainMode; use near_store::trie::outgoing_metadata::ReceiptGroupsQueue; use near_store::trie::TrieRecorder; -use near_store::{DBCol, ShardTries, ShardUId, Store}; -use node_runtime::bootstrap_congestion_info; +use near_store::{DBCol, ShardTries, ShardUId, Store, TrieAccess}; pub struct ReshardingManager { store: Store, epoch_manager: Arc, - runtime_adapter: Arc, /// Configuration for resharding. pub resharding_config: MutableConfigValue, /// A handle that allows the main process to interrupt resharding if needed. @@ -47,19 +45,12 @@ impl ReshardingManager { ) -> Self { let resharding_handle = ReshardingHandle::new(); let flat_storage_resharder = FlatStorageResharder::new( - runtime_adapter.clone(), + runtime_adapter, resharding_sender, FlatStorageResharderController::from_resharding_handle(resharding_handle.clone()), resharding_config.clone(), ); - Self { - store, - epoch_manager, - runtime_adapter, - resharding_config, - flat_storage_resharder, - resharding_handle, - } + Self { store, epoch_manager, resharding_config, flat_storage_resharder, resharding_handle } } /// Trigger resharding if shard layout changes after the given block. @@ -286,62 +277,48 @@ impl ReshardingManager { *child_chunk_extra.state_root_mut() = new_state_root; if let Some(congestion_info) = child_chunk_extra.congestion_info_mut() { - let &parent_state_root = parent_chunk_extra.state_root(); - *congestion_info = self.get_child_congestion_info( - block, - tries, - parent_shard_uid, - parent_state_root, - new_shard_uid, - new_state_root, + // Get the congestion info based on the parent shard. + let epoch_id = block.header().epoch_id(); + let parent_shard_layout = self.epoch_manager.get_shard_layout(&epoch_id)?; + let parent_state_root = *parent_chunk_extra.state_root(); + let parent_congestion_info = congestion_info.clone(); + let trie = tries.get_trie_for_shard(parent_shard_uid, parent_state_root); + *congestion_info = Self::get_child_congestion_info( + &trie, + &parent_shard_layout, + parent_congestion_info, retain_mode, - &congestion_info, )?; - // Please note the usage of the child shard layout here. + // Set the allowed shard based on the child shard. let next_epoch_id = self.epoch_manager.get_next_epoch_id(block.hash())?; let next_shard_layout = self.epoch_manager.get_shard_layout(&next_epoch_id)?; - let all_shards = next_shard_layout.shard_ids().collect_vec(); - let own_shard = new_shard_uid.shard_id(); - let own_shard_index = next_shard_layout - .get_shard_index(own_shard)? - .try_into() - .expect("ShardIndex must fit in u64"); - - // Use simplified congestion seed. The proper one should be - // block height + shard index, however the block heigh is not - // easily available in all required places. - let congestion_seed = own_shard_index; - congestion_info.finalize_allowed_shard(own_shard, &all_shards, congestion_seed); + Self::finalize_allowed_shard(&next_shard_layout, new_shard_uid, congestion_info)?; } Ok(child_chunk_extra) } - fn get_child_congestion_info( - &mut self, - block: &Block, - tries: &ShardTries, - parent_shard_uid: ShardUId, - parent_state_root: CryptoHash, - new_shard_uid: ShardUId, - new_state_root: CryptoHash, + // Get the congestion info for the child shard. + // + // The left child contains all the delayed and buffered receipts from the + // parent so it should have identical congestion info. + // + // The right child contains all the delayed receipts from the parent but it + // has no buffered receipts. It's info needs to be computed by subtracting + // the parent's buffered receipts from the parent's congestion info. + pub fn get_child_congestion_info( + trie: &dyn TrieAccess, + parent_shard_layout: &ShardLayout, + parent_congestion_info: CongestionInfo, retain_mode: RetainMode, - congestion_info: &CongestionInfo, ) -> Result { if retain_mode == RetainMode::Left { - return Ok(congestion_info.clone()); + return Ok(parent_congestion_info); } - // left child -> unchanged - // right child -> remove the buffered receipts - let epoch_id = block.header().epoch_id(); - let shard_layout = self.epoch_manager.get_shard_layout(&epoch_id)?; - let protocol_version = self.epoch_manager.get_epoch_protocol_version(epoch_id)?; - - let trie = tries.get_trie_for_shard(parent_shard_uid, parent_state_root); - let mut smart_congestion_info = congestion_info.clone(); - for shard_id in shard_layout.shard_ids() { - let receipt_groups = ReceiptGroupsQueue::load(&trie, shard_id)?; + let mut congestion_info = parent_congestion_info; + for shard_id in parent_shard_layout.shard_ids() { + let receipt_groups = ReceiptGroupsQueue::load(trie, shard_id)?; let Some(receipt_groups) = receipt_groups else { continue; }; @@ -350,27 +327,35 @@ impl ReshardingManager { let gas = receipt_groups.total_gas(); let gas = gas.try_into().expect("ReceiptGroup gas must fit in u64"); - smart_congestion_info + congestion_info .remove_buffered_receipt_gas(gas) .expect("Buffered gas must not exceed congestion info buffered gas"); - smart_congestion_info + congestion_info .remove_receipt_bytes(bytes) .expect("Buffered size must not exceed congestion info buffered size"); } - assert_eq!(smart_congestion_info.buffered_receipts_gas(), 0); - - let trie = tries.get_trie_for_shard(new_shard_uid, new_state_root); - let config = self.runtime_adapter.get_runtime_config(protocol_version)?; - let new_shard_id = new_shard_uid.shard_id(); - let congestion_info = bootstrap_congestion_info(&trie, &config, new_shard_id)?; - - smart_congestion_info.set_allowed_shard(congestion_info.allowed_shard()); - assert_eq!(congestion_info, smart_congestion_info); + assert_eq!(congestion_info.buffered_receipts_gas(), 0); Ok(congestion_info) } + pub fn finalize_allowed_shard( + next_shard_layout: &ShardLayout, + new_shard_uid: ShardUId, + congestion_info: &mut CongestionInfo, + ) -> Result<(), Error> { + let all_shards = next_shard_layout.shard_ids().collect_vec(); + let own_shard = new_shard_uid.shard_id(); + let own_shard_index = next_shard_layout + .get_shard_index(own_shard)? + .try_into() + .expect("ShardIndex must fit in u64"); + let congestion_seed = own_shard_index; + congestion_info.finalize_allowed_shard(own_shard, &all_shards, congestion_seed); + Ok(()) + } + // TODO(store): Use proper store interface fn get_chunk_extra( &self, diff --git a/chain/chain/src/stateless_validation/chunk_validation.rs b/chain/chain/src/stateless_validation/chunk_validation.rs index 87d4a5f5c8b..ae48b9a05cf 100644 --- a/chain/chain/src/stateless_validation/chunk_validation.rs +++ b/chain/chain/src/stateless_validation/chunk_validation.rs @@ -4,6 +4,7 @@ use crate::chain::{ }; use crate::rayon_spawner::RayonAsyncComputationSpawner; use crate::resharding::event_type::ReshardingEventType; +use crate::resharding::manager::ReshardingManager; use crate::sharding::shuffle_receipt_proofs; use crate::stateless_validation::processing_tracker::ProcessingDoneTracker; use crate::store::filter_incoming_receipts_for_shard; @@ -722,7 +723,27 @@ pub fn validate_chunk_state_witness( let new_root = trie.retain_split_shard(&boundary_account, retain_mode)?; if let Some(congestion_info) = chunk_extra.congestion_info_mut() { - // let trie = tries.get_trie_for_shard(new_shard_uid, new_state_root); + // Get the congestion info based on the parent shard. + let epoch_id = epoch_manager.get_epoch_id(&block_hash)?; + let parent_shard_layout = epoch_manager.get_shard_layout(&epoch_id)?; + let parent_congestion_info = congestion_info.clone(); + *congestion_info = ReshardingManager::get_child_congestion_info( + // This is iffy - this should be trie parent trie. + &trie, + &parent_shard_layout, + parent_congestion_info, + retain_mode, + )?; + + // Set the allowed shard based on the child shard. + let next_epoch_id = epoch_manager.get_next_epoch_id(&block_hash)?; + let next_shard_layout = epoch_manager.get_shard_layout(&next_epoch_id)?; + ReshardingManager::finalize_allowed_shard( + &next_shard_layout, + child_shard_uid, + congestion_info, + )?; + let config = runtime_adapter.get_runtime_config(protocol_version)?; let new_shard_id = child_shard_uid.shard_id(); *congestion_info = bootstrap_congestion_info(&trie, &config, new_shard_id)?; From 752e8044bac19f07c45c8c8441bfed3309371ff1 Mon Sep 17 00:00:00 2001 From: wacban Date: Wed, 18 Dec 2024 14:27:04 +0100 Subject: [PATCH 12/26] nit --- .../stateless_validation/chunk_validation.rs | 23 +------------------ .../src/test_loop/tests/resharding_v3.rs | 2 +- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/chain/chain/src/stateless_validation/chunk_validation.rs b/chain/chain/src/stateless_validation/chunk_validation.rs index ae48b9a05cf..d26fe7de2e8 100644 --- a/chain/chain/src/stateless_validation/chunk_validation.rs +++ b/chain/chain/src/stateless_validation/chunk_validation.rs @@ -14,7 +14,6 @@ use crate::types::{ }; use crate::validate::validate_chunk_with_chunk_extra_and_receipts_root; use crate::{Chain, ChainStore, ChainStoreAccess}; -use itertools::Itertools; use lru::LruCache; use near_async::futures::AsyncComputationSpawnerExt; use near_chain_primitives::Error; @@ -37,7 +36,6 @@ use near_primitives::types::{AccountId, ProtocolVersion, ShardId, ShardIndex}; use near_primitives::utils::compression::CompressedData; use near_store::trie::ops::resharding::RetainMode; use near_store::{PartialStorage, Trie}; -use node_runtime::bootstrap_congestion_info; use std::collections::HashMap; use std::num::NonZeroUsize; use std::sync::{Arc, Mutex}; @@ -723,6 +721,7 @@ pub fn validate_chunk_state_witness( let new_root = trie.retain_split_shard(&boundary_account, retain_mode)?; if let Some(congestion_info) = chunk_extra.congestion_info_mut() { + tracing::info!("BOOM doing congestion info"); // Get the congestion info based on the parent shard. let epoch_id = epoch_manager.get_epoch_id(&block_hash)?; let parent_shard_layout = epoch_manager.get_shard_layout(&epoch_id)?; @@ -743,26 +742,6 @@ pub fn validate_chunk_state_witness( child_shard_uid, congestion_info, )?; - - let config = runtime_adapter.get_runtime_config(protocol_version)?; - let new_shard_id = child_shard_uid.shard_id(); - *congestion_info = bootstrap_congestion_info(&trie, &config, new_shard_id)?; - - // Please note the usage of the child shard layout here. - let next_epoch_id = epoch_manager.get_next_epoch_id(&block_hash)?; - let next_shard_layout = epoch_manager.get_shard_layout(&next_epoch_id)?; - let all_shards = next_shard_layout.shard_ids().collect_vec(); - let own_shard = new_shard_id; - let own_shard_index = next_shard_layout - .get_shard_index(own_shard)? - .try_into() - .expect("ShardIndex must fit in u64"); - - // Use simplified congestion seed. The proper one should be - // block height + shard index, however the block heigh is not - // easily available in all required places. - let congestion_seed = own_shard_index; - congestion_info.finalize_allowed_shard(own_shard, &all_shards, congestion_seed); } (child_shard_uid, new_root) diff --git a/integration-tests/src/test_loop/tests/resharding_v3.rs b/integration-tests/src/test_loop/tests/resharding_v3.rs index c2a7a58ce7a..a0153dfaf0c 100644 --- a/integration-tests/src/test_loop/tests/resharding_v3.rs +++ b/integration-tests/src/test_loop/tests/resharding_v3.rs @@ -968,7 +968,7 @@ fn test_resharding_v3_split_parent_buffered_receipts_base(base_shard_layout_vers let account_in_right_child: AccountId = "account6".parse().unwrap(); let params = TestReshardingParametersBuilder::default() .base_shard_layout_version(base_shard_layout_version) - .allow_negative_refcount() + .allow_negative_refcount(true) .deploy_test_contract(receiver_account.clone()) .limit_outgoing_gas(true) .add_loop_action(call_burn_gas_contract( From f49a309aafcf2c5b20081e0cacace682b8943eb0 Mon Sep 17 00:00:00 2001 From: wacban Date: Wed, 18 Dec 2024 14:51:57 +0100 Subject: [PATCH 13/26] cleanup --- chain/chain/src/resharding/manager.rs | 36 ++++++++++--------- .../stateless_validation/chunk_validation.rs | 17 +++++---- runtime/runtime/src/congestion_control.rs | 6 ---- runtime/runtime/src/lib.rs | 9 +++-- 4 files changed, 34 insertions(+), 34 deletions(-) diff --git a/chain/chain/src/resharding/manager.rs b/chain/chain/src/resharding/manager.rs index 97eac43700f..7d322c2e68e 100644 --- a/chain/chain/src/resharding/manager.rs +++ b/chain/chain/src/resharding/manager.rs @@ -204,7 +204,6 @@ impl ReshardingManager { target: "resharding", ?new_shard_uid, ?retain_mode, "Creating child memtrie by retaining nodes in parent memtrie..." ); - let mut mem_tries = mem_tries.write().unwrap(); let mut trie_recorder = TrieRecorder::new(); let mode = TrackingMode::RefcountsAndAccesses(&mut trie_recorder); @@ -222,10 +221,10 @@ impl ReshardingManager { let child_chunk_extra = self.get_child_chunk_extra( block, &tries, + parent_shard_uid, &parent_chunk_extra, - new_state_root, new_shard_uid, - parent_shard_uid, + new_state_root, retain_mode, )?; @@ -264,10 +263,10 @@ impl ReshardingManager { &mut self, block: &Block, tries: &ShardTries, + parent_shard_uid: ShardUId, parent_chunk_extra: &Arc, - new_state_root: CryptoHash, new_shard_uid: ShardUId, - parent_shard_uid: ShardUId, + new_state_root: CryptoHash, retain_mode: RetainMode, ) -> Result { // TODO(resharding): set all fields of `ChunkExtra`. Consider stronger @@ -282,9 +281,9 @@ impl ReshardingManager { let parent_shard_layout = self.epoch_manager.get_shard_layout(&epoch_id)?; let parent_state_root = *parent_chunk_extra.state_root(); let parent_congestion_info = congestion_info.clone(); - let trie = tries.get_trie_for_shard(parent_shard_uid, parent_state_root); + let parent_trie = tries.get_trie_for_shard(parent_shard_uid, parent_state_root); *congestion_info = Self::get_child_congestion_info( - &trie, + &parent_trie, &parent_shard_layout, parent_congestion_info, retain_mode, @@ -298,27 +297,28 @@ impl ReshardingManager { Ok(child_chunk_extra) } - // Get the congestion info for the child shard. - // - // The left child contains all the delayed and buffered receipts from the - // parent so it should have identical congestion info. - // - // The right child contains all the delayed receipts from the parent but it - // has no buffered receipts. It's info needs to be computed by subtracting - // the parent's buffered receipts from the parent's congestion info. + // Get the congestion info for the child shard. The congestion info can be + // inferred efficiently from the combination of the parent shard's + // congestion info and the receipt group metadata, that is available in the + // parent shard's trie. pub fn get_child_congestion_info( - trie: &dyn TrieAccess, + parent_trie: &dyn TrieAccess, parent_shard_layout: &ShardLayout, parent_congestion_info: CongestionInfo, retain_mode: RetainMode, ) -> Result { + // The left child contains all the delayed and buffered receipts from the + // parent so it should have identical congestion info. if retain_mode == RetainMode::Left { return Ok(parent_congestion_info); } + // The right child contains all the delayed receipts from the parent but it + // has no buffered receipts. It's info needs to be computed by subtracting + // the parent's buffered receipts from the parent's congestion info. let mut congestion_info = parent_congestion_info; for shard_id in parent_shard_layout.shard_ids() { - let receipt_groups = ReceiptGroupsQueue::load(trie, shard_id)?; + let receipt_groups = ReceiptGroupsQueue::load(parent_trie, shard_id)?; let Some(receipt_groups) = receipt_groups else { continue; }; @@ -335,6 +335,8 @@ impl ReshardingManager { .expect("Buffered size must not exceed congestion info buffered size"); } + // The right child does not inherit any buffered receipts. The + // congestion info must match this invariant. assert_eq!(congestion_info.buffered_receipts_gas(), 0); Ok(congestion_info) diff --git a/chain/chain/src/stateless_validation/chunk_validation.rs b/chain/chain/src/stateless_validation/chunk_validation.rs index d26fe7de2e8..d41fb1abf6c 100644 --- a/chain/chain/src/stateless_validation/chunk_validation.rs +++ b/chain/chain/src/stateless_validation/chunk_validation.rs @@ -713,22 +713,19 @@ pub fn validate_chunk_state_witness( child_shard_uid, ) => { let old_root = *chunk_extra.state_root(); - let trie = Trie::from_recorded_storage( - PartialStorage { nodes: transition.base_state }, - old_root, - true, - ); - let new_root = trie.retain_split_shard(&boundary_account, retain_mode)?; + let partial_storage = PartialStorage { nodes: transition.base_state }; + let parent_trie = Trie::from_recorded_storage(partial_storage, old_root, true); + // Update the congestion info based on the parent shard. It's + // important to do this step before the `retain_split_shard` + // because only the parent has the needed information. if let Some(congestion_info) = chunk_extra.congestion_info_mut() { - tracing::info!("BOOM doing congestion info"); // Get the congestion info based on the parent shard. let epoch_id = epoch_manager.get_epoch_id(&block_hash)?; let parent_shard_layout = epoch_manager.get_shard_layout(&epoch_id)?; let parent_congestion_info = congestion_info.clone(); *congestion_info = ReshardingManager::get_child_congestion_info( - // This is iffy - this should be trie parent trie. - &trie, + &parent_trie, &parent_shard_layout, parent_congestion_info, retain_mode, @@ -744,6 +741,8 @@ pub fn validate_chunk_state_witness( )?; } + let new_root = parent_trie.retain_split_shard(&boundary_account, retain_mode)?; + (child_shard_uid, new_root) } }; diff --git a/runtime/runtime/src/congestion_control.rs b/runtime/runtime/src/congestion_control.rs index 7f2691e2ad4..2071305ad63 100644 --- a/runtime/runtime/src/congestion_control.rs +++ b/runtime/runtime/src/congestion_control.rs @@ -635,9 +635,6 @@ pub fn bootstrap_congestion_info( config: &RuntimeConfig, shard_id: ShardId, ) -> Result { - tracing::warn!(target: "runtime", "starting to bootstrap congestion info, this might take a while"); - let start = std::time::Instant::now(); - let mut receipt_bytes: u64 = 0; let mut delayed_receipts_gas: u128 = 0; let mut buffered_receipts_gas: u128 = 0; @@ -666,9 +663,6 @@ pub fn bootstrap_congestion_info( } } - let time = start.elapsed(); - tracing::warn!(target: "runtime","bootstrapping congestion info done after {time:#.1?}"); - Ok(CongestionInfo::V1(CongestionInfoV1 { delayed_receipts_gas: delayed_receipts_gas as u128, buffered_receipts_gas: buffered_receipts_gas as u128, diff --git a/runtime/runtime/src/lib.rs b/runtime/runtime/src/lib.rs index cfbb34df768..1ca1a43b3de 100644 --- a/runtime/runtime/src/lib.rs +++ b/runtime/runtime/src/lib.rs @@ -2230,8 +2230,13 @@ impl ApplyState { return Ok(Some(congestion_info.congestion_info)); } - let congestion_info = bootstrap_congestion_info(trie, &self.config, self.shard_id)?; - Ok(Some(congestion_info)) + tracing::warn!(target: "runtime", "starting to bootstrap congestion info, this might take a while"); + let start = std::time::Instant::now(); + let result = bootstrap_congestion_info(trie, &self.config, self.shard_id); + let time = start.elapsed(); + tracing::warn!(target: "runtime","bootstrapping congestion info done after {time:#.1?}"); + let computed = result?; + Ok(Some(computed)) } } From f10e0ac90ed0dc566a5e1e5a44acd3739097ebef Mon Sep 17 00:00:00 2001 From: wacban Date: Wed, 18 Dec 2024 15:20:19 +0100 Subject: [PATCH 14/26] nits --- chain/chain/src/resharding/manager.rs | 86 ++++++++++--------- .../stateless_validation/chunk_validation.rs | 2 +- 2 files changed, 47 insertions(+), 41 deletions(-) diff --git a/chain/chain/src/resharding/manager.rs b/chain/chain/src/resharding/manager.rs index 7d322c2e68e..6d077a947d8 100644 --- a/chain/chain/src/resharding/manager.rs +++ b/chain/chain/src/resharding/manager.rs @@ -218,15 +218,27 @@ impl ReshardingManager { let new_state_root = mem_tries.apply_memtrie_changes(block_height, mem_changes); drop(mem_tries); - let child_chunk_extra = self.get_child_chunk_extra( - block, + // TODO(resharding): set all fields of `ChunkExtra`. Consider stronger + // typing. Clarify where it should happen when `State` and + // `FlatState` update is implemented. + let mut child_chunk_extra = ChunkExtra::clone(&parent_chunk_extra); + *child_chunk_extra.state_root_mut() = new_state_root; + + let parent_epoch_id = block.header().epoch_id(); + let parent_shard_layout = self.epoch_manager.get_shard_layout(&parent_epoch_id)?; + let child_epoch_id = self.epoch_manager.get_next_epoch_id(block.hash())?; + let child_shard_layout = self.epoch_manager.get_shard_layout(&child_epoch_id)?; + let child_congestion_info = Self::get_child_congestion_info( &tries, + &parent_shard_layout, parent_shard_uid, &parent_chunk_extra, + &child_shard_layout, new_shard_uid, - new_state_root, retain_mode, )?; + *child_chunk_extra.congestion_info_mut().expect("The congestion info must exist!") = + child_congestion_info; chain_store_update.save_chunk_extra(block_hash, &new_shard_uid, child_chunk_extra); chain_store_update.save_state_transition_data( @@ -259,49 +271,43 @@ impl ReshardingManager { Ok(()) } - fn get_child_chunk_extra( - &mut self, - block: &Block, + fn get_child_congestion_info( tries: &ShardTries, + parent_shard_layout: &ShardLayout, parent_shard_uid: ShardUId, parent_chunk_extra: &Arc, - new_shard_uid: ShardUId, - new_state_root: CryptoHash, + child_shard_layout: &ShardLayout, + child_shard_uid: ShardUId, retain_mode: RetainMode, - ) -> Result { - // TODO(resharding): set all fields of `ChunkExtra`. Consider stronger - // typing. Clarify where it should happen when `State` and - // `FlatState` update is implemented. - let mut child_chunk_extra = ChunkExtra::clone(parent_chunk_extra); - *child_chunk_extra.state_root_mut() = new_state_root; - - if let Some(congestion_info) = child_chunk_extra.congestion_info_mut() { - // Get the congestion info based on the parent shard. - let epoch_id = block.header().epoch_id(); - let parent_shard_layout = self.epoch_manager.get_shard_layout(&epoch_id)?; - let parent_state_root = *parent_chunk_extra.state_root(); - let parent_congestion_info = congestion_info.clone(); - let parent_trie = tries.get_trie_for_shard(parent_shard_uid, parent_state_root); - *congestion_info = Self::get_child_congestion_info( - &parent_trie, - &parent_shard_layout, - parent_congestion_info, - retain_mode, - )?; + ) -> Result { + let parent_congestion_info = + parent_chunk_extra.congestion_info().expect("The congestion info must exist!"); + + // Get the congestion info based on the parent shard. + let parent_state_root = *parent_chunk_extra.state_root(); + let parent_trie = tries.get_trie_for_shard(parent_shard_uid, parent_state_root); + let mut child_congestion_info = Self::get_child_congestion_info_not_finalized( + &parent_trie, + &parent_shard_layout, + parent_congestion_info, + retain_mode, + )?; - // Set the allowed shard based on the child shard. - let next_epoch_id = self.epoch_manager.get_next_epoch_id(block.hash())?; - let next_shard_layout = self.epoch_manager.get_shard_layout(&next_epoch_id)?; - Self::finalize_allowed_shard(&next_shard_layout, new_shard_uid, congestion_info)?; - } - Ok(child_chunk_extra) + // Set the allowed shard based on the child shard. + Self::finalize_allowed_shard( + &child_shard_layout, + child_shard_uid, + &mut child_congestion_info, + )?; + + Ok(child_congestion_info) } // Get the congestion info for the child shard. The congestion info can be // inferred efficiently from the combination of the parent shard's // congestion info and the receipt group metadata, that is available in the // parent shard's trie. - pub fn get_child_congestion_info( + pub fn get_child_congestion_info_not_finalized( parent_trie: &dyn TrieAccess, parent_shard_layout: &ShardLayout, parent_congestion_info: CongestionInfo, @@ -343,13 +349,13 @@ impl ReshardingManager { } pub fn finalize_allowed_shard( - next_shard_layout: &ShardLayout, - new_shard_uid: ShardUId, + child_shard_layout: &ShardLayout, + child_shard_uid: ShardUId, congestion_info: &mut CongestionInfo, ) -> Result<(), Error> { - let all_shards = next_shard_layout.shard_ids().collect_vec(); - let own_shard = new_shard_uid.shard_id(); - let own_shard_index = next_shard_layout + let all_shards = child_shard_layout.shard_ids().collect_vec(); + let own_shard = child_shard_uid.shard_id(); + let own_shard_index = child_shard_layout .get_shard_index(own_shard)? .try_into() .expect("ShardIndex must fit in u64"); diff --git a/chain/chain/src/stateless_validation/chunk_validation.rs b/chain/chain/src/stateless_validation/chunk_validation.rs index d41fb1abf6c..35962b23915 100644 --- a/chain/chain/src/stateless_validation/chunk_validation.rs +++ b/chain/chain/src/stateless_validation/chunk_validation.rs @@ -724,7 +724,7 @@ pub fn validate_chunk_state_witness( let epoch_id = epoch_manager.get_epoch_id(&block_hash)?; let parent_shard_layout = epoch_manager.get_shard_layout(&epoch_id)?; let parent_congestion_info = congestion_info.clone(); - *congestion_info = ReshardingManager::get_child_congestion_info( + *congestion_info = ReshardingManager::get_child_congestion_info_not_finalized( &parent_trie, &parent_shard_layout, parent_congestion_info, From fbcca65903067403eee33afd7b41698446d6c4e3 Mon Sep 17 00:00:00 2001 From: wacban Date: Wed, 18 Dec 2024 15:36:13 +0100 Subject: [PATCH 15/26] nits --- chain/chain/src/resharding/manager.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/chain/chain/src/resharding/manager.rs b/chain/chain/src/resharding/manager.rs index 6d077a947d8..52c32f10547 100644 --- a/chain/chain/src/resharding/manager.rs +++ b/chain/chain/src/resharding/manager.rs @@ -218,12 +218,7 @@ impl ReshardingManager { let new_state_root = mem_tries.apply_memtrie_changes(block_height, mem_changes); drop(mem_tries); - // TODO(resharding): set all fields of `ChunkExtra`. Consider stronger - // typing. Clarify where it should happen when `State` and - // `FlatState` update is implemented. - let mut child_chunk_extra = ChunkExtra::clone(&parent_chunk_extra); - *child_chunk_extra.state_root_mut() = new_state_root; - + // Get the congestion info for the child. let parent_epoch_id = block.header().epoch_id(); let parent_shard_layout = self.epoch_manager.get_shard_layout(&parent_epoch_id)?; let child_epoch_id = self.epoch_manager.get_next_epoch_id(block.hash())?; @@ -237,6 +232,12 @@ impl ReshardingManager { new_shard_uid, retain_mode, )?; + + // TODO(resharding): set all fields of `ChunkExtra`. Consider stronger + // typing. Clarify where it should happen when `State` and + // `FlatState` update is implemented. + let mut child_chunk_extra = ChunkExtra::clone(&parent_chunk_extra); + *child_chunk_extra.state_root_mut() = new_state_root; *child_chunk_extra.congestion_info_mut().expect("The congestion info must exist!") = child_congestion_info; From db568c2c57ee17b784c8fdf004559071be52e7c5 Mon Sep 17 00:00:00 2001 From: wacban Date: Thu, 19 Dec 2024 10:31:20 +0100 Subject: [PATCH 16/26] clippy --- chain/chain/src/stateless_validation/chunk_validation.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chain/chain/src/stateless_validation/chunk_validation.rs b/chain/chain/src/stateless_validation/chunk_validation.rs index 35962b23915..64cd614c7cf 100644 --- a/chain/chain/src/stateless_validation/chunk_validation.rs +++ b/chain/chain/src/stateless_validation/chunk_validation.rs @@ -723,7 +723,7 @@ pub fn validate_chunk_state_witness( // Get the congestion info based on the parent shard. let epoch_id = epoch_manager.get_epoch_id(&block_hash)?; let parent_shard_layout = epoch_manager.get_shard_layout(&epoch_id)?; - let parent_congestion_info = congestion_info.clone(); + let parent_congestion_info = *congestion_info; *congestion_info = ReshardingManager::get_child_congestion_info_not_finalized( &parent_trie, &parent_shard_layout, From d6e361718f8f78bed933f02507acdfe73cbedf51 Mon Sep 17 00:00:00 2001 From: wacban Date: Thu, 19 Dec 2024 10:56:30 +0100 Subject: [PATCH 17/26] remove old todo --- Justfile | 2 +- core/primitives-core/src/version.rs | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Justfile b/Justfile index 91dfadfa9d2..f8614e41392 100644 --- a/Justfile +++ b/Justfile @@ -2,7 +2,7 @@ # them at earliest convenience :) # Also in addition to this, the `nextest-integration` test is currently disabled on macos platform_excludes := if os() == "macos" { - "--exclude node-runtime --exclude runtime-params-estimator --exclude near-network --exclude estimator-warehouse --exclude integration-tests" + "" } else if os() == "windows" { "--exclude node-runtime --exclude runtime-params-estimator --exclude near-network --exclude estimator-warehouse --exclude integration-tests" } else { diff --git a/core/primitives-core/src/version.rs b/core/primitives-core/src/version.rs index b58568a8952..35f78cc3e53 100644 --- a/core/primitives-core/src/version.rs +++ b/core/primitives-core/src/version.rs @@ -275,7 +275,6 @@ impl ProtocolFeature { // BandwidthScheduler must be enabled before ReshardingV3! When // releasing this feature please make sure to schedule separate // protocol upgrades for those features! - // TODO ask longarithm to update the forknet instructions ProtocolFeature::BandwidthScheduler => 145, ProtocolFeature::SimpleNightshadeV4 => 146, #[cfg(feature = "protocol_feature_relaxed_chunk_validation")] From c8c683a65e02483f1d470e0e96162b0d06c4a9ef Mon Sep 17 00:00:00 2001 From: wacban Date: Thu, 19 Dec 2024 11:39:28 +0100 Subject: [PATCH 18/26] remove allow_negative_refcount --- integration-tests/src/test_loop/tests/resharding_v3.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/integration-tests/src/test_loop/tests/resharding_v3.rs b/integration-tests/src/test_loop/tests/resharding_v3.rs index a0153dfaf0c..48b26cb46c8 100644 --- a/integration-tests/src/test_loop/tests/resharding_v3.rs +++ b/integration-tests/src/test_loop/tests/resharding_v3.rs @@ -968,7 +968,6 @@ fn test_resharding_v3_split_parent_buffered_receipts_base(base_shard_layout_vers let account_in_right_child: AccountId = "account6".parse().unwrap(); let params = TestReshardingParametersBuilder::default() .base_shard_layout_version(base_shard_layout_version) - .allow_negative_refcount(true) .deploy_test_contract(receiver_account.clone()) .limit_outgoing_gas(true) .add_loop_action(call_burn_gas_contract( From bb0f0be7d087a5ab6b2d7dd1620a35795d65f4d9 Mon Sep 17 00:00:00 2001 From: wacban Date: Mon, 23 Dec 2024 12:17:43 +0100 Subject: [PATCH 19/26] revert justfile --- Justfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Justfile b/Justfile index f8614e41392..91dfadfa9d2 100644 --- a/Justfile +++ b/Justfile @@ -2,7 +2,7 @@ # them at earliest convenience :) # Also in addition to this, the `nextest-integration` test is currently disabled on macos platform_excludes := if os() == "macos" { - "" + "--exclude node-runtime --exclude runtime-params-estimator --exclude near-network --exclude estimator-warehouse --exclude integration-tests" } else if os() == "windows" { "--exclude node-runtime --exclude runtime-params-estimator --exclude near-network --exclude estimator-warehouse --exclude integration-tests" } else { From d1da51158f09f9ceb7cf7488c735d840d9b88ad3 Mon Sep 17 00:00:00 2001 From: wacban Date: Fri, 3 Jan 2025 12:03:15 +0100 Subject: [PATCH 20/26] review comments --- chain/chain/src/resharding/manager.rs | 9 ++++++++- core/primitives/src/congestion_info.rs | 10 ++++++---- runtime/runtime/src/congestion_control.rs | 2 +- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/chain/chain/src/resharding/manager.rs b/chain/chain/src/resharding/manager.rs index a6b92bc9d6d..089ffddf57b 100644 --- a/chain/chain/src/resharding/manager.rs +++ b/chain/chain/src/resharding/manager.rs @@ -338,7 +338,6 @@ impl ReshardingManager { let bytes = receipt_groups.total_size(); let gas = receipt_groups.total_gas(); - let gas = gas.try_into().expect("ReceiptGroup gas must fit in u64"); congestion_info .remove_buffered_receipt_gas(gas) @@ -366,6 +365,14 @@ impl ReshardingManager { .get_shard_index(own_shard)? .try_into() .expect("ShardIndex must fit in u64"); + // Please note that the congestion seed used during resharding is + // different than the one used during normal operation. In runtime the + // seed is set to the sum of shard index and block height. The block + // height isn't easily available on all call sites which is why the + // simplified seed is used. This is valid because it's deterministic and + // resharding is a very rare event. However in a perfect world it should + // be the same. + // TODO - Use proper congestion control seed during resharding. let congestion_seed = own_shard_index; congestion_info.finalize_allowed_shard(own_shard, &all_shards, congestion_seed); Ok(()) diff --git a/core/primitives/src/congestion_info.rs b/core/primitives/src/congestion_info.rs index f7e7c886357..b8433de7331 100644 --- a/core/primitives/src/congestion_info.rs +++ b/core/primitives/src/congestion_info.rs @@ -300,11 +300,11 @@ impl CongestionInfo { Ok(()) } - pub fn remove_buffered_receipt_gas(&mut self, gas: Gas) -> Result<(), RuntimeError> { + pub fn remove_buffered_receipt_gas(&mut self, gas: u128) -> Result<(), RuntimeError> { match self { CongestionInfo::V1(inner) => { inner.buffered_receipts_gas = - inner.buffered_receipts_gas.checked_sub(gas as u128).ok_or_else(|| { + inner.buffered_receipts_gas.checked_sub(gas).ok_or_else(|| { RuntimeError::UnexpectedIntegerOverflow( "remove_buffered_receipt_gas".into(), ) @@ -730,7 +730,8 @@ mod tests { assert_eq!(config.max_tx_gas, control.process_tx_limit()); // remove halve the congestion - info.remove_buffered_receipt_gas(config.max_congestion_outgoing_gas / 2).unwrap(); + let gas_diff = config.max_congestion_outgoing_gas / 2; + info.remove_buffered_receipt_gas(gas_diff.into()).unwrap(); let control = CongestionControl::new(config, info, 0); assert_eq!(0.5, control.congestion_level()); assert_eq!( @@ -741,7 +742,8 @@ mod tests { assert!(control.shard_accepts_transactions().is_no()); // reduce congestion to 1/8 - info.remove_buffered_receipt_gas(3 * config.max_congestion_outgoing_gas / 8).unwrap(); + let gas_diff = 3 * config.max_congestion_outgoing_gas / 8; + info.remove_buffered_receipt_gas(gas_diff.into()).unwrap(); let control = CongestionControl::new(config, info, 0); assert_eq!(0.125, control.congestion_level()); assert_eq!( diff --git a/runtime/runtime/src/congestion_control.rs b/runtime/runtime/src/congestion_control.rs index 2071305ad63..29cd84ef677 100644 --- a/runtime/runtime/src/congestion_control.rs +++ b/runtime/runtime/src/congestion_control.rs @@ -321,7 +321,7 @@ impl ReceiptSinkV2 { )? { ReceiptForwarding::Forwarded => { self.own_congestion_info.remove_receipt_bytes(size)?; - self.own_congestion_info.remove_buffered_receipt_gas(gas)?; + self.own_congestion_info.remove_buffered_receipt_gas(gas.into())?; if should_update_outgoing_metadatas { // Can't update metadatas immediately because state_update is borrowed by iterator. outgoing_metadatas_updates.push((ByteSize::b(size), gas)); From fa3fef0c758758a629bdee514fbb774b244605df Mon Sep 17 00:00:00 2001 From: wacban Date: Mon, 6 Jan 2025 13:43:57 +0100 Subject: [PATCH 21/26] debug --- Justfile | 2 +- chain/chain/src/resharding/manager.rs | 2 ++ chain/chain/src/runtime/mod.rs | 2 +- chain/chain/src/stateless_validation/chunk_validation.rs | 1 + chain/client/src/client.rs | 4 +++- core/o11y/src/testonly.rs | 2 +- core/primitives-core/src/hash.rs | 3 ++- integration-tests/src/test_loop/tests/resharding_v3.rs | 8 ++++++++ 8 files changed, 19 insertions(+), 5 deletions(-) diff --git a/Justfile b/Justfile index 91dfadfa9d2..f8614e41392 100644 --- a/Justfile +++ b/Justfile @@ -2,7 +2,7 @@ # them at earliest convenience :) # Also in addition to this, the `nextest-integration` test is currently disabled on macos platform_excludes := if os() == "macos" { - "--exclude node-runtime --exclude runtime-params-estimator --exclude near-network --exclude estimator-warehouse --exclude integration-tests" + "" } else if os() == "windows" { "--exclude node-runtime --exclude runtime-params-estimator --exclude near-network --exclude estimator-warehouse --exclude integration-tests" } else { diff --git a/chain/chain/src/resharding/manager.rs b/chain/chain/src/resharding/manager.rs index 089ffddf57b..2280f58b892 100644 --- a/chain/chain/src/resharding/manager.rs +++ b/chain/chain/src/resharding/manager.rs @@ -320,6 +320,7 @@ impl ReshardingManager { parent_congestion_info: CongestionInfo, retain_mode: RetainMode, ) -> Result { + tracing::debug!(target: "resharding", "Getting child congestion info."); // The left child contains all the delayed and buffered receipts from the // parent so it should have identical congestion info. if retain_mode == RetainMode::Left { @@ -351,6 +352,7 @@ impl ReshardingManager { // congestion info must match this invariant. assert_eq!(congestion_info.buffered_receipts_gas(), 0); + tracing::debug!(target: "resharding", "Getting child congestion info done."); Ok(congestion_info) } diff --git a/chain/chain/src/runtime/mod.rs b/chain/chain/src/runtime/mod.rs index 671acbdb3fa..5fe95bd1718 100644 --- a/chain/chain/src/runtime/mod.rs +++ b/chain/chain/src/runtime/mod.rs @@ -799,7 +799,7 @@ impl RuntimeAdapter for NightshadeRuntime { } } } - debug!(target: "runtime", "Transaction filtering results {} valid out of {} pulled from the pool", result.transactions.len(), num_checked_transactions); + debug!(target: "runtime", limited_by=?result.limited_by, "Transaction filtering results {} valid out of {} pulled from the pool", result.transactions.len(), num_checked_transactions); let shard_label = shard_id.to_string(); metrics::PREPARE_TX_SIZE.with_label_values(&[&shard_label]).observe(total_size as f64); metrics::PREPARE_TX_REJECTED diff --git a/chain/chain/src/stateless_validation/chunk_validation.rs b/chain/chain/src/stateless_validation/chunk_validation.rs index 1ad210b8292..7ec385f3bd1 100644 --- a/chain/chain/src/stateless_validation/chunk_validation.rs +++ b/chain/chain/src/stateless_validation/chunk_validation.rs @@ -97,6 +97,7 @@ pub fn validate_prepared_transactions( ) -> Result { let parent_block = chain.chain_store().get_block(chunk_header.prev_block_hash())?; let last_chunk_transactions_size = borsh::to_vec(last_chunk_transactions)?.len(); + tracing::info!("boom prepare_transactions from validator"); runtime_adapter.prepare_transactions( storage_config, crate::types::PrepareTransactionsChunkContext { diff --git a/chain/client/src/client.rs b/chain/client/src/client.rs index 00dc12711ac..0ad96830816 100644 --- a/chain/client/src/client.rs +++ b/chain/client/src/client.rs @@ -1072,6 +1072,7 @@ impl Client { } else { 0 }; + tracing::info!("boom prepare_transactions from client"); runtime.prepare_transactions( storage_config, PrepareTransactionsChunkContext { @@ -2255,7 +2256,8 @@ impl Client { validators.remove(account_id); } for validator in validators { - trace!(target: "client", me = ?signer.as_ref().map(|bp| bp.validator_id()), ?tx, ?validator, ?shard_id, "Routing a transaction"); + let tx_hash = tx.get_hash(); + trace!(target: "client", me = ?signer.as_ref().map(|bp| bp.validator_id()), ?tx_hash, ?validator, ?shard_id, "Routing a transaction"); // Send message to network to actually forward transaction. self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( diff --git a/core/o11y/src/testonly.rs b/core/o11y/src/testonly.rs index e8bffd8a5a8..bda6b69cf4d 100644 --- a/core/o11y/src/testonly.rs +++ b/core/o11y/src/testonly.rs @@ -23,7 +23,7 @@ fn setup_subscriber_from_filter(mut env_filter: EnvFilter) { let _ = fmt::Subscriber::builder() .with_ansi(use_color_auto()) - .with_span_events(fmt::format::FmtSpan::CLOSE) + .with_span_events(fmt::format::FmtSpan::NONE) .with_env_filter(env_filter) .with_writer(fmt::TestWriter::new()) .with_timer(TestUptime::default()) diff --git a/core/primitives-core/src/hash.rs b/core/primitives-core/src/hash.rs index 194facb1aa6..08730bdc717 100644 --- a/core/primitives-core/src/hash.rs +++ b/core/primitives-core/src/hash.rs @@ -209,7 +209,8 @@ impl fmt::Debug for CryptoHash { impl fmt::Display for CryptoHash { fn fmt(&self, fmtr: &mut fmt::Formatter<'_>) -> fmt::Result { - self.to_base58_impl(|encoded| fmtr.write_str(encoded)) + // TODO remove me debugging only + self.to_base58_impl(|encoded| fmtr.write_str(&encoded[..4])) } } diff --git a/integration-tests/src/test_loop/tests/resharding_v3.rs b/integration-tests/src/test_loop/tests/resharding_v3.rs index a593ee9fbf5..59a4d6e1850 100644 --- a/integration-tests/src/test_loop/tests/resharding_v3.rs +++ b/integration-tests/src/test_loop/tests/resharding_v3.rs @@ -437,6 +437,14 @@ fn test_resharding_v3_base(params: TestReshardingParameters) { assert!(block_header.chunk_mask().iter().all(|chunk_bit| *chunk_bit)); } + let shard_layout = client.epoch_manager.get_shard_layout(&tip.epoch_id).unwrap(); + println!( + "new block #{} shards: {:?} chunk mask {:?}", + tip.height, + shard_layout.shard_ids().collect_vec(), + block_header.chunk_mask().to_vec() + ); + trie_sanity_check.assert_state_sanity(&clients, expected_num_shards); let epoch_height = From 130daf9a88708120dbd533cac9d7ae2a5dfe0a82 Mon Sep 17 00:00:00 2001 From: wacban Date: Tue, 7 Jan 2025 09:50:52 +0100 Subject: [PATCH 22/26] wip - recording --- chain/chain/src/resharding/manager.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/chain/chain/src/resharding/manager.rs b/chain/chain/src/resharding/manager.rs index 2280f58b892..0db1438c638 100644 --- a/chain/chain/src/resharding/manager.rs +++ b/chain/chain/src/resharding/manager.rs @@ -216,10 +216,6 @@ impl ReshardingManager { let mem_trie_update = mem_tries.update(*parent_chunk_extra.state_root(), mode)?; let trie_changes = mem_trie_update.retain_split_shard(&boundary_account, retain_mode); - let partial_storage = trie_recorder.recorded_storage(); - let partial_state_len = match &partial_storage.nodes { - PartialState::TrieValues(values) => values.len(), - }; let mem_changes = trie_changes.mem_trie_changes.as_ref().unwrap(); let new_state_root = mem_tries.apply_memtrie_changes(block_height, mem_changes); drop(mem_tries); @@ -227,18 +223,25 @@ impl ReshardingManager { // Get the congestion info for the child. let parent_epoch_id = block.header().epoch_id(); let parent_shard_layout = self.epoch_manager.get_shard_layout(&parent_epoch_id)?; + let parent_state_root = *parent_chunk_extra.state_root(); + let parent_trie = tries.get_trie_for_shard(parent_shard_uid, parent_state_root); + let child_epoch_id = self.epoch_manager.get_next_epoch_id(block.hash())?; let child_shard_layout = self.epoch_manager.get_shard_layout(&child_epoch_id)?; let child_congestion_info = Self::get_child_congestion_info( - &tries, + &parent_trie, &parent_shard_layout, - parent_shard_uid, &parent_chunk_extra, &child_shard_layout, new_shard_uid, retain_mode, )?; + let partial_storage = trie_recorder.recorded_storage(); + let partial_state_len = match &partial_storage.nodes { + PartialState::TrieValues(values) => values.len(), + }; + // TODO(resharding): set all fields of `ChunkExtra`. Consider stronger // typing. Clarify where it should happen when `State` and // `FlatState` update is implemented. @@ -279,9 +282,8 @@ impl ReshardingManager { } fn get_child_congestion_info( - tries: &ShardTries, + parent_trie: &dyn TrieAccess, parent_shard_layout: &ShardLayout, - parent_shard_uid: ShardUId, parent_chunk_extra: &Arc, child_shard_layout: &ShardLayout, child_shard_uid: ShardUId, @@ -291,10 +293,8 @@ impl ReshardingManager { parent_chunk_extra.congestion_info().expect("The congestion info must exist!"); // Get the congestion info based on the parent shard. - let parent_state_root = *parent_chunk_extra.state_root(); - let parent_trie = tries.get_trie_for_shard(parent_shard_uid, parent_state_root); let mut child_congestion_info = Self::get_child_congestion_info_not_finalized( - &parent_trie, + parent_trie, &parent_shard_layout, parent_congestion_info, retain_mode, From 4424fae5c9e7c7a3e63fe5dc8952075cb15b47b2 Mon Sep 17 00:00:00 2001 From: wacban Date: Tue, 7 Jan 2025 11:44:00 +0100 Subject: [PATCH 23/26] record proof --- chain/chain/src/resharding/manager.rs | 7 +++- chain/chain/src/runtime/mod.rs | 4 +-- core/store/src/trie/mod.rs | 29 ++++++++++++---- core/store/src/trie/state_parts.rs | 6 ++-- core/store/src/trie/trie_recording.rs | 6 ++-- core/store/src/trie/trie_tests.rs | 14 ++++---- runtime/runtime/src/tests/apply.rs | 48 +++++++++++++-------------- 7 files changed, 69 insertions(+), 45 deletions(-) diff --git a/chain/chain/src/resharding/manager.rs b/chain/chain/src/resharding/manager.rs index 0db1438c638..0a6d1efbbcd 100644 --- a/chain/chain/src/resharding/manager.rs +++ b/chain/chain/src/resharding/manager.rs @@ -1,3 +1,4 @@ +use std::cell::RefCell; use std::io; use std::sync::Arc; @@ -226,6 +227,9 @@ impl ReshardingManager { let parent_state_root = *parent_chunk_extra.state_root(); let parent_trie = tries.get_trie_for_shard(parent_shard_uid, parent_state_root); + let trie_recorder = RefCell::new(trie_recorder); + let parent_trie = parent_trie.recording_reads_with_recorder(trie_recorder); + let child_epoch_id = self.epoch_manager.get_next_epoch_id(block.hash())?; let child_shard_layout = self.epoch_manager.get_shard_layout(&child_epoch_id)?; let child_congestion_info = Self::get_child_congestion_info( @@ -237,7 +241,8 @@ impl ReshardingManager { retain_mode, )?; - let partial_storage = trie_recorder.recorded_storage(); + let trie_recorder = parent_trie.take_recorder().unwrap(); + let partial_storage = trie_recorder.borrow_mut().recorded_storage(); let partial_state_len = match &partial_storage.nodes { PartialState::TrieValues(values) => values.len(), }; diff --git a/chain/chain/src/runtime/mod.rs b/chain/chain/src/runtime/mod.rs index 5fe95bd1718..29077da4cd1 100644 --- a/chain/chain/src/runtime/mod.rs +++ b/chain/chain/src/runtime/mod.rs @@ -650,7 +650,7 @@ impl RuntimeAdapter for NightshadeRuntime { if ProtocolFeature::StatelessValidation.enabled(next_protocol_version) || cfg!(feature = "shadow_chunk_validation") { - trie = trie.recording_reads(); + trie = trie.recording_reads_new_recorder(); } let mut state_update = TrieUpdate::new(trie); @@ -882,7 +882,7 @@ impl RuntimeAdapter for NightshadeRuntime { if ProtocolFeature::StatelessValidation.enabled(next_protocol_version) || cfg!(feature = "shadow_chunk_validation") { - trie = trie.recording_reads(); + trie = trie.recording_reads_new_recorder(); } match self.process_state_update( diff --git a/core/store/src/trie/mod.rs b/core/store/src/trie/mod.rs index d998f5129b0..93c521d1b8d 100644 --- a/core/store/src/trie/mod.rs +++ b/core/store/src/trie/mod.rs @@ -717,18 +717,28 @@ impl Trie { /// Makes a new trie that has everything the same except that access /// through that trie accumulates a state proof for all nodes accessed. - pub fn recording_reads(&self) -> Self { + pub fn recording_reads_new_recorder(&self) -> Self { + self.recording_reads_with_recorder(RefCell::new(TrieRecorder::new())) + } + + /// Makes a new trie that has everything the same except that access + /// through that trie accumulates a state proof for all nodes accessed. + pub fn recording_reads_with_recorder(&self, recorder: RefCell) -> Self { let mut trie = Self::new_with_memtries( self.storage.clone(), self.memtries.clone(), self.root, self.flat_storage_chunk_view.clone(), ); - trie.recorder = Some(RefCell::new(TrieRecorder::new())); + trie.recorder = Some(recorder); trie.charge_gas_for_trie_node_access = self.charge_gas_for_trie_node_access; trie } + pub fn take_recorder(self) -> Option> { + self.recorder + } + /// Takes the recorded state proof out of the trie. pub fn recorded_storage(&self) -> Option { self.recorder.as_ref().map(|recorder| recorder.borrow_mut().recorded_storage()) @@ -2224,7 +2234,8 @@ mod tests { ]; let root = test_populate_trie(&tries, &empty_root, ShardUId::single_shard(), changes); - let trie2 = tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(); + let trie2 = + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(); trie2.get(b"dog").unwrap(); trie2.get(b"horse").unwrap(); let partial_storage = trie2.recorded_storage(); @@ -2253,14 +2264,18 @@ mod tests { let root = test_populate_trie(&tries, &empty_root, ShardUId::single_shard(), changes); // Trie: extension -> branch -> 2 leaves { - let trie2 = tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(); + let trie2 = tries + .get_trie_for_shard(ShardUId::single_shard(), root) + .recording_reads_new_recorder(); trie2.get(b"doge").unwrap(); // record extension, branch and one leaf with value, but not the other assert_eq!(trie2.recorded_storage().unwrap().nodes.len(), 4); } { - let trie2 = tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(); + let trie2 = tries + .get_trie_for_shard(ShardUId::single_shard(), root) + .recording_reads_new_recorder(); let updates = vec![(b"doge".to_vec(), None)]; trie2.update(updates).unwrap(); // record extension, branch and both leaves, but not the value. @@ -2268,7 +2283,9 @@ mod tests { } { - let trie2 = tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(); + let trie2 = tries + .get_trie_for_shard(ShardUId::single_shard(), root) + .recording_reads_new_recorder(); let updates = vec![(b"dodo".to_vec(), Some(b"asdf".to_vec()))]; trie2.update(updates).unwrap(); // record extension and branch, but not leaves diff --git a/core/store/src/trie/state_parts.rs b/core/store/src/trie/state_parts.rs index 1e578947f03..5044a432ff1 100644 --- a/core/store/src/trie/state_parts.rs +++ b/core/store/src/trie/state_parts.rs @@ -81,7 +81,7 @@ impl Trie { &self, part_id: PartId, ) -> Result { - let with_recording = self.recording_reads(); + let with_recording = self.recording_reads_new_recorder(); with_recording.visit_nodes_for_state_part(part_id)?; let recorded = with_recording.recorded_storage().unwrap(); Ok(recorded.nodes) @@ -147,7 +147,7 @@ impl Trie { let PartId { idx, total } = part_id; // 1. Extract nodes corresponding to state part boundaries. - let recording_trie = self.recording_reads(); + let recording_trie = self.recording_reads_new_recorder(); let boundaries_read_timer = metrics::GET_STATE_PART_BOUNDARIES_ELAPSED .with_label_values(&[&shard_id.to_string()]) .start_timer(); @@ -826,7 +826,7 @@ mod tests { for part_id in 0..num_parts { // Compute proof with size and check that it doesn't exceed theoretical boundary for // the path with full set of left siblings of maximal possible size. - let trie_recording = trie.recording_reads(); + let trie_recording = trie.recording_reads_new_recorder(); let left_nibbles_boundary = trie_recording.find_state_part_boundary(part_id, num_parts).unwrap(); let left_key_boundary = NibbleSlice::nibbles_to_bytes(&left_nibbles_boundary); diff --git a/core/store/src/trie/trie_recording.rs b/core/store/src/trie/trie_recording.rs index c38180cf715..39caa908878 100644 --- a/core/store/src/trie/trie_recording.rs +++ b/core/store/src/trie/trie_recording.rs @@ -550,7 +550,7 @@ mod trie_recording_tests { // Now let's do this again while recording, and make sure that the counters // we get are exactly the same. let trie = get_trie_for_shard(&tries, shard_uid, state_root, use_flat_storage) - .recording_reads(); + .recording_reads_new_recorder(); trie.accounting_cache.borrow().enable_switch().set(enable_accounting_cache); for key in &keys_to_get { assert_eq!(trie.get(key).unwrap(), data_in_trie.get(key).cloned()); @@ -575,7 +575,7 @@ mod trie_recording_tests { // in-memory tries. destructively_delete_in_memory_state_from_disk(&store.trie_store(), &data_in_trie); let trie = get_trie_for_shard(&tries, shard_uid, state_root, use_flat_storage) - .recording_reads(); + .recording_reads_new_recorder(); trie.accounting_cache.borrow().enable_switch().set(enable_accounting_cache); for key in &keys_to_get { assert_eq!(trie.get(key).unwrap(), data_in_trie.get(key).cloned()); @@ -619,7 +619,7 @@ mod trie_recording_tests { // Build a Trie using recorded storage and enable recording_reads on this Trie let trie = Trie::from_recorded_storage(partial_storage, state_root, use_flat_storage) - .recording_reads(); + .recording_reads_new_recorder(); trie.accounting_cache.borrow().enable_switch().set(enable_accounting_cache); for key in &keys_to_get { assert_eq!(trie.get(key).unwrap(), data_in_trie.get(key).cloned()); diff --git a/core/store/src/trie/trie_tests.rs b/core/store/src/trie/trie_tests.rs index 1cae1d30e96..16fffff3a4c 100644 --- a/core/store/src/trie/trie_tests.rs +++ b/core/store/src/trie/trie_tests.rs @@ -64,7 +64,7 @@ where F: FnMut(Trie) -> Result<(Trie, Out), StorageError>, Out: PartialEq + Debug, { - let recording_trie = trie.recording_reads(); + let recording_trie = trie.recording_reads_new_recorder(); let (recording_trie, output) = test(recording_trie).expect("should not fail"); (recording_trie.recorded_storage().unwrap(), recording_trie, output) } @@ -432,7 +432,7 @@ mod trie_storage_tests { let state_root = test_populate_trie(&tries, &Trie::EMPTY_ROOT, shard_uid, base_changes.clone()); - let trie = tries.get_trie_for_shard(shard_uid, state_root).recording_reads(); + let trie = tries.get_trie_for_shard(shard_uid, state_root).recording_reads_new_recorder(); let changes = trie.update(updates.clone()).unwrap(); tracing::info!("Changes: {:?}", changes); @@ -443,7 +443,7 @@ mod trie_storage_tests { let shard_uid = ShardUId::single_shard(); let state_root = test_populate_trie(&tries, &Trie::EMPTY_ROOT, shard_uid, base_changes); - let trie = tries.get_trie_for_shard(shard_uid, state_root).recording_reads(); + let trie = tries.get_trie_for_shard(shard_uid, state_root).recording_reads_new_recorder(); let changes = trie.update(updates).unwrap(); tracing::info!("Changes: {:?}", changes); @@ -501,7 +501,8 @@ mod trie_storage_tests { vec![(vec![7], vec![1]), (vec![7, 0], vec![2]), (vec![7, 1], vec![3])]; let disk_iter_recorded = { - let trie = tries.get_trie_for_shard(shard_uid, state_root).recording_reads(); + let trie = + tries.get_trie_for_shard(shard_uid, state_root).recording_reads_new_recorder(); let mut disk_iter = trie.disk_iter().unwrap(); disk_iter.seek_prefix(&iter_prefix).unwrap(); let disk_iter_results = disk_iter.collect::, _>>().unwrap(); @@ -510,7 +511,8 @@ mod trie_storage_tests { }; let memtrie_iter_recorded = { - let trie = tries.get_trie_for_shard(shard_uid, state_root).recording_reads(); + let trie = + tries.get_trie_for_shard(shard_uid, state_root).recording_reads_new_recorder(); let lock = trie.lock_for_iter(); let mut memtrie_iter = lock.iter().unwrap(); match memtrie_iter { @@ -529,7 +531,7 @@ mod trie_storage_tests { let partial_recorded = { let trie = Trie::from_recorded_storage(memtrie_iter_recorded, state_root, true) - .recording_reads(); + .recording_reads_new_recorder(); let mut disk_iter = trie.disk_iter().unwrap(); disk_iter.seek_prefix(&iter_prefix).unwrap(); let disk_iter_results = disk_iter.collect::, _>>().unwrap(); diff --git a/runtime/runtime/src/tests/apply.rs b/runtime/runtime/src/tests/apply.rs index f48a8fa7db6..80cc41d1984 100644 --- a/runtime/runtime/src/tests/apply.rs +++ b/runtime/runtime/src/tests/apply.rs @@ -1153,7 +1153,7 @@ fn test_main_storage_proof_size_soft_limit() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[ @@ -1195,7 +1195,7 @@ fn test_main_storage_proof_size_soft_limit() { // The function call to bob_account should hit the main_storage_proof_size_soft_limit let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[ @@ -1258,7 +1258,7 @@ fn test_exclude_contract_code_from_witness() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[ @@ -1300,7 +1300,7 @@ fn test_exclude_contract_code_from_witness() { // The function call to bob_account should hit the main_storage_proof_size_soft_limit let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[ @@ -1372,7 +1372,7 @@ fn test_exclude_contract_code_from_witness_with_failed_call() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[ @@ -1413,7 +1413,7 @@ fn test_exclude_contract_code_from_witness_with_failed_call() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[ @@ -1506,7 +1506,7 @@ fn test_deploy_and_call_different_contracts() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[first_deploy_receipt, second_deploy_receipt], @@ -1533,7 +1533,7 @@ fn test_deploy_and_call_different_contracts() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[first_call_receipt, second_call_receipt], @@ -1612,7 +1612,7 @@ fn test_deploy_and_call_different_contracts_with_failed_call() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[first_deploy_receipt, second_deploy_receipt], @@ -1639,7 +1639,7 @@ fn test_deploy_and_call_different_contracts_with_failed_call() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[first_call_receipt, second_call_receipt], @@ -1716,7 +1716,7 @@ fn test_deploy_and_call_in_apply() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[first_deploy_receipt, second_deploy_receipt, first_call_receipt, second_call_receipt], @@ -1795,7 +1795,7 @@ fn test_deploy_and_call_in_apply_with_failed_call() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[first_deploy_receipt, second_deploy_receipt, first_call_receipt, second_call_receipt], @@ -1850,7 +1850,7 @@ fn test_deploy_existing_contract_to_different_account() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[first_deploy_receipt, first_call_receipt], @@ -1892,7 +1892,7 @@ fn test_deploy_existing_contract_to_different_account() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[second_deploy_receipt, second_call_receipt], @@ -1941,7 +1941,7 @@ fn test_deploy_and_call_in_same_receipt() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[receipt], @@ -1990,7 +1990,7 @@ fn test_deploy_and_call_in_same_receipt_with_failed_call() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[receipt], @@ -2026,7 +2026,7 @@ fn test_call_account_without_contract() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[receipt], @@ -2070,7 +2070,7 @@ fn test_contract_accesses_when_validating_chunk() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[deploy_receipt], @@ -2096,7 +2096,7 @@ fn test_contract_accesses_when_validating_chunk() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[call_receipt.clone()], @@ -2117,7 +2117,7 @@ fn test_contract_accesses_when_validating_chunk() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[call_receipt], @@ -2164,7 +2164,7 @@ fn test_exclude_existing_contract_code_for_deploy_action() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[deploy_receipt1], @@ -2188,7 +2188,7 @@ fn test_exclude_existing_contract_code_for_deploy_action() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[deploy_receipt2], @@ -2265,7 +2265,7 @@ fn test_exclude_existing_contract_code_for_delete_account_action() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[create_account_receipt, deploy_receipt], @@ -2289,7 +2289,7 @@ fn test_exclude_existing_contract_code_for_delete_account_action() { let apply_result = runtime .apply( - tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads(), + tries.get_trie_for_shard(ShardUId::single_shard(), root).recording_reads_new_recorder(), &None, &apply_state, &[delete_account_receipt], From 2796ac014cb3b1bb2612007590266741ed0780ff Mon Sep 17 00:00:00 2001 From: wacban Date: Tue, 7 Jan 2025 11:44:10 +0100 Subject: [PATCH 24/26] undo debug logs --- Justfile | 2 +- core/o11y/src/testonly.rs | 2 +- core/primitives-core/src/hash.rs | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Justfile b/Justfile index f8614e41392..91dfadfa9d2 100644 --- a/Justfile +++ b/Justfile @@ -2,7 +2,7 @@ # them at earliest convenience :) # Also in addition to this, the `nextest-integration` test is currently disabled on macos platform_excludes := if os() == "macos" { - "" + "--exclude node-runtime --exclude runtime-params-estimator --exclude near-network --exclude estimator-warehouse --exclude integration-tests" } else if os() == "windows" { "--exclude node-runtime --exclude runtime-params-estimator --exclude near-network --exclude estimator-warehouse --exclude integration-tests" } else { diff --git a/core/o11y/src/testonly.rs b/core/o11y/src/testonly.rs index bda6b69cf4d..e8bffd8a5a8 100644 --- a/core/o11y/src/testonly.rs +++ b/core/o11y/src/testonly.rs @@ -23,7 +23,7 @@ fn setup_subscriber_from_filter(mut env_filter: EnvFilter) { let _ = fmt::Subscriber::builder() .with_ansi(use_color_auto()) - .with_span_events(fmt::format::FmtSpan::NONE) + .with_span_events(fmt::format::FmtSpan::CLOSE) .with_env_filter(env_filter) .with_writer(fmt::TestWriter::new()) .with_timer(TestUptime::default()) diff --git a/core/primitives-core/src/hash.rs b/core/primitives-core/src/hash.rs index 08730bdc717..194facb1aa6 100644 --- a/core/primitives-core/src/hash.rs +++ b/core/primitives-core/src/hash.rs @@ -209,8 +209,7 @@ impl fmt::Debug for CryptoHash { impl fmt::Display for CryptoHash { fn fmt(&self, fmtr: &mut fmt::Formatter<'_>) -> fmt::Result { - // TODO remove me debugging only - self.to_base58_impl(|encoded| fmtr.write_str(&encoded[..4])) + self.to_base58_impl(|encoded| fmtr.write_str(encoded)) } } From e9517cfe884f3ec376f4c8ab795ce8bc3e5ab9ab Mon Sep 17 00:00:00 2001 From: wacban Date: Tue, 7 Jan 2025 13:28:15 +0100 Subject: [PATCH 25/26] nits --- chain/chain/src/resharding/manager.rs | 21 ++++---- .../stateless_validation/chunk_validation.rs | 48 +++++++++---------- 2 files changed, 32 insertions(+), 37 deletions(-) diff --git a/chain/chain/src/resharding/manager.rs b/chain/chain/src/resharding/manager.rs index 0a6d1efbbcd..7af8c4c3153 100644 --- a/chain/chain/src/resharding/manager.rs +++ b/chain/chain/src/resharding/manager.rs @@ -226,6 +226,8 @@ impl ReshardingManager { let parent_shard_layout = self.epoch_manager.get_shard_layout(&parent_epoch_id)?; let parent_state_root = *parent_chunk_extra.state_root(); let parent_trie = tries.get_trie_for_shard(parent_shard_uid, parent_state_root); + let parent_congestion_info = + parent_chunk_extra.congestion_info().expect("The congestion info must exist!"); let trie_recorder = RefCell::new(trie_recorder); let parent_trie = parent_trie.recording_reads_with_recorder(trie_recorder); @@ -235,7 +237,7 @@ impl ReshardingManager { let child_congestion_info = Self::get_child_congestion_info( &parent_trie, &parent_shard_layout, - &parent_chunk_extra, + parent_congestion_info, &child_shard_layout, new_shard_uid, retain_mode, @@ -286,17 +288,14 @@ impl ReshardingManager { Ok(()) } - fn get_child_congestion_info( + pub fn get_child_congestion_info( parent_trie: &dyn TrieAccess, parent_shard_layout: &ShardLayout, - parent_chunk_extra: &Arc, + parent_congestion_info: CongestionInfo, child_shard_layout: &ShardLayout, child_shard_uid: ShardUId, retain_mode: RetainMode, ) -> Result { - let parent_congestion_info = - parent_chunk_extra.congestion_info().expect("The congestion info must exist!"); - // Get the congestion info based on the parent shard. let mut child_congestion_info = Self::get_child_congestion_info_not_finalized( parent_trie, @@ -319,23 +318,22 @@ impl ReshardingManager { // inferred efficiently from the combination of the parent shard's // congestion info and the receipt group metadata, that is available in the // parent shard's trie. - pub fn get_child_congestion_info_not_finalized( + fn get_child_congestion_info_not_finalized( parent_trie: &dyn TrieAccess, parent_shard_layout: &ShardLayout, parent_congestion_info: CongestionInfo, retain_mode: RetainMode, ) -> Result { - tracing::debug!(target: "resharding", "Getting child congestion info."); // The left child contains all the delayed and buffered receipts from the // parent so it should have identical congestion info. if retain_mode == RetainMode::Left { - return Ok(parent_congestion_info); + return Ok(parent_congestion_info.clone()); } // The right child contains all the delayed receipts from the parent but it // has no buffered receipts. It's info needs to be computed by subtracting // the parent's buffered receipts from the parent's congestion info. - let mut congestion_info = parent_congestion_info; + let mut congestion_info = parent_congestion_info.clone(); for shard_id in parent_shard_layout.shard_ids() { let receipt_groups = ReceiptGroupsQueue::load(parent_trie, shard_id)?; let Some(receipt_groups) = receipt_groups else { @@ -357,11 +355,10 @@ impl ReshardingManager { // congestion info must match this invariant. assert_eq!(congestion_info.buffered_receipts_gas(), 0); - tracing::debug!(target: "resharding", "Getting child congestion info done."); Ok(congestion_info) } - pub fn finalize_allowed_shard( + fn finalize_allowed_shard( child_shard_layout: &ShardLayout, child_shard_uid: ShardUId, congestion_info: &mut CongestionInfo, diff --git a/chain/chain/src/stateless_validation/chunk_validation.rs b/chain/chain/src/stateless_validation/chunk_validation.rs index 7ec385f3bd1..5998e165c29 100644 --- a/chain/chain/src/stateless_validation/chunk_validation.rs +++ b/chain/chain/src/stateless_validation/chunk_validation.rs @@ -692,7 +692,7 @@ pub fn validate_chunk_state_witness( .into_iter() .zip(state_witness.implicit_transitions.into_iter()) { - let (shard_uid, new_state_root) = match implicit_transition_params { + let (shard_uid, new_state_root, new_congestion_info) = match implicit_transition_params { ImplicitTransitionParams::ApplyOldChunk(block, shard_uid) => { let shard_context = ShardContext { shard_uid, should_apply_chunk: false }; let old_chunk_data = OldChunkData { @@ -712,7 +712,9 @@ pub fn validate_chunk_state_witness( shard_context, runtime_adapter, )?; - (shard_uid, apply_result.new_root) + let congestion_info = + chunk_extra.congestion_info().expect("The congestion info must exist!"); + (shard_uid, apply_result.new_root, congestion_info) } ImplicitTransitionParams::Resharding( boundary_account, @@ -725,36 +727,32 @@ pub fn validate_chunk_state_witness( // Update the congestion info based on the parent shard. It's // important to do this step before the `retain_split_shard` - // because only the parent has the needed information. - if let Some(congestion_info) = chunk_extra.congestion_info_mut() { - // Get the congestion info based on the parent shard. - let epoch_id = epoch_manager.get_epoch_id(&block_hash)?; - let parent_shard_layout = epoch_manager.get_shard_layout(&epoch_id)?; - let parent_congestion_info = *congestion_info; - *congestion_info = ReshardingManager::get_child_congestion_info_not_finalized( - &parent_trie, - &parent_shard_layout, - parent_congestion_info, - retain_mode, - )?; - - // Set the allowed shard based on the child shard. - let next_epoch_id = epoch_manager.get_next_epoch_id(&block_hash)?; - let next_shard_layout = epoch_manager.get_shard_layout(&next_epoch_id)?; - ReshardingManager::finalize_allowed_shard( - &next_shard_layout, - child_shard_uid, - congestion_info, - )?; - } + // because only the parent trie has the needed information. + let epoch_id = epoch_manager.get_epoch_id(&block_hash)?; + let parent_shard_layout = epoch_manager.get_shard_layout(&epoch_id)?; + let parent_congestion_info = + chunk_extra.congestion_info().expect("The congestion info must exist"); + + let child_epoch_id = epoch_manager.get_next_epoch_id(&block_hash)?; + let child_shard_layout = epoch_manager.get_shard_layout(&child_epoch_id)?; + let child_congestion_info = ReshardingManager::get_child_congestion_info( + &parent_trie, + &parent_shard_layout, + parent_congestion_info, + &child_shard_layout, + child_shard_uid, + retain_mode, + )?; let new_root = parent_trie.retain_split_shard(&boundary_account, retain_mode)?; - (child_shard_uid, new_root) + (child_shard_uid, new_root, child_congestion_info) } }; *chunk_extra.state_root_mut() = new_state_root; + *chunk_extra.congestion_info_mut().expect("The congestion info must exist!") = + new_congestion_info; if chunk_extra.state_root() != &transition.post_state_root { // This is an early check, it's not for correctness, only for better // error reporting in case of an invalid state witness due to a bug. From be19d21d637df1364d8d59ca56e7c7b07ffade40 Mon Sep 17 00:00:00 2001 From: wacban Date: Tue, 7 Jan 2025 14:52:01 +0100 Subject: [PATCH 26/26] clippy --- chain/chain/src/resharding/manager.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chain/chain/src/resharding/manager.rs b/chain/chain/src/resharding/manager.rs index 7af8c4c3153..3548943d42d 100644 --- a/chain/chain/src/resharding/manager.rs +++ b/chain/chain/src/resharding/manager.rs @@ -327,13 +327,13 @@ impl ReshardingManager { // The left child contains all the delayed and buffered receipts from the // parent so it should have identical congestion info. if retain_mode == RetainMode::Left { - return Ok(parent_congestion_info.clone()); + return Ok(parent_congestion_info); } // The right child contains all the delayed receipts from the parent but it // has no buffered receipts. It's info needs to be computed by subtracting // the parent's buffered receipts from the parent's congestion info. - let mut congestion_info = parent_congestion_info.clone(); + let mut congestion_info = parent_congestion_info; for shard_id in parent_shard_layout.shard_ids() { let receipt_groups = ReceiptGroupsQueue::load(parent_trie, shard_id)?; let Some(receipt_groups) = receipt_groups else {