From f06f7988216a4f0081f45136f38cacb44da0ee3c Mon Sep 17 00:00:00 2001 From: Marcelo Diop-Gonzalez Date: Fri, 20 Dec 2024 05:15:17 -0500 Subject: [PATCH 01/15] fix(snapshots): store BlockInfo in Resharding(CatchingUp) status (#12651) https://github.com/near/nearcore/pull/12589 made a change required for making snapshots of child shards that are catching up after resharding, where if we want to take a state snapshot of a child shard's flat storage whose status is `Resharding(CatchingUp)`, then we set it to `Ready` in the state snapshot. To do this, we need to be able to figure out what the correct `BlockInfo` is just from the hash, so we read the info from the block headers column. But this column is not kept in state snapshots (which was previously overlooked in test loop since everything was copied). So fix it by storing a `BlockInfo` in the `Resharding(CatchingUp)` status so we don't need to look these up anymore --- chain/chain/src/flat_storage_resharder.rs | 79 ++++++++++--------- chain/chain/src/resharding/event_type.rs | 17 ++-- chain/chain/src/resharding/manager.rs | 8 +- .../stateless_validation/chunk_validation.rs | 22 ++++-- core/store/src/db.rs | 2 +- core/store/src/db/testdb.rs | 7 +- core/store/src/flat/manager.rs | 30 +------ core/store/src/flat/types.rs | 15 +++- core/store/src/opener.rs | 2 +- .../res/protocol_schema.toml | 6 +- 10 files changed, 99 insertions(+), 89 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 83bd481f3fa..47e78ca8ebc 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -175,7 +175,7 @@ impl FlatStorageResharder { parent_shard, left_child_shard, right_child_shard, - resharding_hash, + resharding_block, .. } = split_params; info!(target: "resharding", ?split_params, "initiating flat storage shard split"); @@ -192,7 +192,7 @@ impl FlatStorageResharder { left_child_shard, right_child_shard, shard_layout: shard_layout.clone(), - resharding_hash, + resharding_block, flat_head, }; store_update.set_flat_storage_status( @@ -390,11 +390,11 @@ impl FlatStorageResharder { let mut iter = match self.flat_storage_iterator( &flat_store, &parent_shard, - &split_params.resharding_hash, + &split_params.resharding_block.hash, ) { Ok(iter) => iter, Err(err) => { - error!(target: "resharding", ?parent_shard, block_hash=?split_params.resharding_hash, ?err, "failed to build flat storage iterator"); + error!(target: "resharding", ?parent_shard, block_hash=?split_params.resharding_block.hash, ?err, "failed to build flat storage iterator"); return FlatStorageReshardingTaskResult::Failed; } }; @@ -482,7 +482,7 @@ impl FlatStorageResharder { left_child_shard, right_child_shard, flat_head, - resharding_hash, + resharding_block, .. } = split_params; let flat_store = self.runtime.store().flat_store(); @@ -507,7 +507,7 @@ impl FlatStorageResharder { store_update.set_flat_storage_status( child_shard, FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp( - resharding_hash, + resharding_block, )), ); // Catchup will happen in a separate task, so send a request to schedule the @@ -670,9 +670,8 @@ impl FlatStorageResharder { .flat_store() .get_flat_storage_status(shard_uid) .map_err(|e| Into::::into(e))?; - let FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp( - mut flat_head_block_hash, - )) = status + let FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp(mut flat_head)) = + status else { return Err(Error::Other(format!( "unexpected resharding catchup flat storage status for {}: {:?}", @@ -686,16 +685,16 @@ impl FlatStorageResharder { target: "resharding", "shard_catchup_apply_deltas/batch", ?shard_uid, - ?flat_head_block_hash, + ?flat_head, batch_id = ?num_batches_done) .entered(); let chain_final_head = chain_store.final_head()?; // If we reached the desired new flat head, we can terminate the delta application step. - if is_flat_head_on_par_with_chain(&flat_head_block_hash, &chain_final_head) { + if is_flat_head_on_par_with_chain(&flat_head.hash, &chain_final_head) { return Ok(Some(( num_batches_done, - Tip::from_header(&chain_store.get_block_header(&flat_head_block_hash)?), + Tip::from_header(&chain_store.get_block_header(&flat_head.hash)?), ))); } @@ -706,26 +705,32 @@ impl FlatStorageResharder { // Merge deltas from the next blocks until we reach chain final head. for _ in 0..catch_up_blocks { - let height = chain_store.get_block_height(&flat_head_block_hash)?; debug_assert!( - height <= chain_final_head.height, - "flat head: {flat_head_block_hash}" + flat_head.height <= chain_final_head.height, + "flat head: {:?}", + &flat_head, ); // Stop if we reached the desired new flat head. - if is_flat_head_on_par_with_chain(&flat_head_block_hash, &chain_final_head) { + if is_flat_head_on_par_with_chain(&flat_head.hash, &chain_final_head) { break; } - if self.coordinate_snapshot(height) { + if self.coordinate_snapshot(flat_head.height) { postpone = true; break; } - flat_head_block_hash = chain_store.get_next_block_hash(&flat_head_block_hash)?; + let next_hash = chain_store.get_next_block_hash(&flat_head.hash)?; + let next_header = chain_store.get_block_header(&next_hash)?; + flat_head = BlockInfo { + hash: *next_header.hash(), + height: next_header.height(), + prev_hash: *next_header.prev_hash(), + }; if let Some(changes) = store - .get_delta(shard_uid, flat_head_block_hash) + .get_delta(shard_uid, flat_head.hash) .map_err(|err| Into::::into(err))? { merged_changes.merge(changes); - store_update.remove_delta(shard_uid, flat_head_block_hash); + store_update.remove_delta(shard_uid, flat_head.hash); } // TODO(resharding): if flat_head_block_hash == state sync hash -> do snapshot } @@ -734,14 +739,12 @@ impl FlatStorageResharder { merged_changes.apply_to_flat_state(&mut store_update, shard_uid); store_update.set_flat_storage_status( shard_uid, - FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp( - flat_head_block_hash, - )), + FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp(flat_head)), ); store_update.commit()?; num_batches_done += 1; - metrics.set_head_height(chain_store.get_block_height(&flat_head_block_hash)?); + metrics.set_head_height(flat_head.height); if postpone { return Ok(None); @@ -1099,7 +1102,7 @@ impl FlatStorageReshardingEventStatus { fn resharding_hash(&self) -> CryptoHash { match self { FlatStorageReshardingEventStatus::SplitShard(_, split_status, ..) => { - split_status.resharding_hash + split_status.resharding_block.hash } } } @@ -1384,12 +1387,9 @@ mod tests { chain: &Chain, new_shard_layout: &ShardLayout, ) -> ReshardingEventType { - ReshardingEventType::from_shard_layout( - &new_shard_layout, - chain.head().unwrap().last_block_hash, - ) - .unwrap() - .unwrap() + ReshardingEventType::from_shard_layout(&new_shard_layout, chain.head().unwrap().into()) + .unwrap() + .unwrap() } enum PreviousBlockHeight { @@ -1524,7 +1524,11 @@ mod tests { left_child_shard, right_child_shard, shard_layout: new_shard_layout, - resharding_hash: CryptoHash::default(), + resharding_block: BlockInfo { + hash: CryptoHash::default(), + height: 2, + prev_hash: CryptoHash::default(), + }, flat_head: BlockInfo { hash: CryptoHash::default(), height: 1, @@ -1597,7 +1601,7 @@ mod tests { assert_eq!( flat_store.get_flat_storage_status(child), Ok(FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp( - chain.final_head().unwrap().last_block_hash + chain.final_head().unwrap().into() ))) ); } @@ -2219,7 +2223,7 @@ mod tests { parent_shard, left_child_shard, right_child_shard, - resharding_hash, + resharding_block, .. } = match resharding_event_type.clone() { ReshardingEventType::SplitShard(params) => params, @@ -2278,12 +2282,15 @@ mod tests { resharder.resharding_config, ); assert!(resharder - .resume(left_child_shard, &FlatStorageReshardingStatus::CatchingUp(resharding_hash)) + .resume( + left_child_shard, + &FlatStorageReshardingStatus::CatchingUp(resharding_block) + ) .is_ok()); assert!(resharder .resume( right_child_shard, - &FlatStorageReshardingStatus::CatchingUp(resharding_hash) + &FlatStorageReshardingStatus::CatchingUp(resharding_block) ) .is_ok()); } diff --git a/chain/chain/src/resharding/event_type.rs b/chain/chain/src/resharding/event_type.rs index b62f7970ee5..4b7bcb1da9c 100644 --- a/chain/chain/src/resharding/event_type.rs +++ b/chain/chain/src/resharding/event_type.rs @@ -1,9 +1,9 @@ //! Collection of all resharding V3 event types. use near_chain_primitives::Error; -use near_primitives::hash::CryptoHash; use near_primitives::shard_layout::ShardLayout; use near_primitives::types::AccountId; +use near_store::flat::BlockInfo; use near_store::ShardUId; use tracing::error; @@ -27,7 +27,7 @@ pub struct ReshardingSplitShardParams { /// The account at the boundary between the two children. pub boundary_account: AccountId, /// Hash of the last block having the old shard layout. - pub resharding_hash: CryptoHash, + pub resharding_block: BlockInfo, } impl ReshardingSplitShardParams { @@ -48,7 +48,7 @@ impl ReshardingEventType { /// `next_shard_layout`, otherwise returns `None`. pub fn from_shard_layout( next_shard_layout: &ShardLayout, - resharding_hash: CryptoHash, + resharding_block: BlockInfo, ) -> Result, Error> { let log_and_error = |err_msg: &str| { error!(target: "resharding", ?next_shard_layout, err_msg); @@ -104,7 +104,7 @@ impl ReshardingEventType { left_child_shard, right_child_shard, boundary_account, - resharding_hash, + resharding_block, })); } _ => { @@ -123,6 +123,7 @@ impl ReshardingEventType { #[cfg(test)] mod tests { use super::*; + use near_primitives::hash::CryptoHash; use near_primitives::shard_layout::ShardLayout; use near_primitives::types::{AccountId, ShardId}; use near_store::ShardUId; @@ -138,7 +139,11 @@ mod tests { /// Verify that the correct type of resharding is deduced from a new shard layout. #[test] fn parse_event_type_from_shard_layout() { - let block = CryptoHash::hash_bytes(&[1]); + let block = BlockInfo { + hash: CryptoHash::hash_bytes(&[1]), + height: 1, + prev_hash: CryptoHash::hash_bytes(&[2]), + }; let s0 = ShardId::new(0); let s1 = ShardId::new(1); @@ -177,7 +182,7 @@ mod tests { parent_shard: ShardUId { version: 3, shard_id: 1 }, left_child_shard: ShardUId { version: 3, shard_id: 2 }, right_child_shard: ShardUId { version: 3, shard_id: 3 }, - resharding_hash: block, + resharding_block: block, boundary_account: account!("pp") })) ); diff --git a/chain/chain/src/resharding/manager.rs b/chain/chain/src/resharding/manager.rs index 5d3bd24e3fd..9249a30006d 100644 --- a/chain/chain/src/resharding/manager.rs +++ b/chain/chain/src/resharding/manager.rs @@ -15,6 +15,7 @@ use near_primitives::hash::CryptoHash; use near_primitives::shard_layout::{get_block_shard_uid, ShardLayout}; use near_primitives::types::chunk_extra::ChunkExtra; use near_store::adapter::{StoreAdapter, StoreUpdateAdapter}; +use near_store::flat::BlockInfo; use near_store::trie::mem::mem_trie_update::TrackingMode; use near_store::trie::ops::resharding::RetainMode; use near_store::trie::TrieRecorder; @@ -83,8 +84,13 @@ impl ReshardingManager { return Ok(()); } + let block_info = BlockInfo { + hash: *block.hash(), + height: block.header().height(), + prev_hash: *block.header().prev_hash(), + }; let resharding_event_type = - ReshardingEventType::from_shard_layout(&next_shard_layout, *block_hash)?; + ReshardingEventType::from_shard_layout(&next_shard_layout, block_info)?; match resharding_event_type { Some(ReshardingEventType::SplitShard(split_shard_event)) => { self.split_shard( diff --git a/chain/chain/src/stateless_validation/chunk_validation.rs b/chain/chain/src/stateless_validation/chunk_validation.rs index 4aa7df2b98a..cf89a9efe87 100644 --- a/chain/chain/src/stateless_validation/chunk_validation.rs +++ b/chain/chain/src/stateless_validation/chunk_validation.rs @@ -19,7 +19,7 @@ use near_chain_primitives::Error; use near_epoch_manager::EpochManagerAdapter; use near_pool::TransactionGroupIteratorWrapper; use near_primitives::apply::ApplyChunkReason; -use near_primitives::block::Block; +use near_primitives::block::{Block, BlockHeader}; use near_primitives::checked_feature; use near_primitives::hash::{hash, CryptoHash}; use near_primitives::merkle::merklize; @@ -33,6 +33,7 @@ use near_primitives::transaction::SignedTransaction; use near_primitives::types::chunk_extra::ChunkExtra; use near_primitives::types::{AccountId, ProtocolVersion, ShardId, ShardIndex}; use near_primitives::utils::compression::CompressedData; +use near_store::flat::BlockInfo; use near_store::trie::ops::resharding::RetainMode; use near_store::{PartialStorage, Trie}; use std::collections::HashMap; @@ -202,7 +203,7 @@ fn get_state_witness_block_range( if let Some(transition) = get_resharding_transition( epoch_manager, - prev_hash, + position.prev_block.header(), shard_uid, position.num_new_chunks_seen, )? { @@ -272,7 +273,7 @@ fn get_state_witness_block_range( /// so, returns the corresponding resharding transition parameters. fn get_resharding_transition( epoch_manager: &dyn EpochManagerAdapter, - prev_hash: &CryptoHash, + prev_header: &BlockHeader, shard_uid: ShardUId, num_new_chunks_seen: u32, ) -> Result, Error> { @@ -282,17 +283,22 @@ fn get_resharding_transition( return Ok(None); } - let shard_layout = epoch_manager.get_shard_layout_from_prev_block(prev_hash)?; - let prev_epoch_id = epoch_manager.get_prev_epoch_id_from_prev_block(prev_hash)?; + let shard_layout = epoch_manager.get_shard_layout_from_prev_block(prev_header.hash())?; + let prev_epoch_id = epoch_manager.get_prev_epoch_id_from_prev_block(prev_header.hash())?; let prev_shard_layout = epoch_manager.get_shard_layout(&prev_epoch_id)?; - let block_has_new_shard_layout = - epoch_manager.is_next_block_epoch_start(prev_hash)? && shard_layout != prev_shard_layout; + let block_has_new_shard_layout = epoch_manager.is_next_block_epoch_start(prev_header.hash())? + && shard_layout != prev_shard_layout; if !block_has_new_shard_layout { return Ok(None); } - let params = match ReshardingEventType::from_shard_layout(&shard_layout, *prev_hash)? { + let block_info = BlockInfo { + hash: *prev_header.hash(), + height: prev_header.height(), + prev_hash: *prev_header.prev_hash(), + }; + let params = match ReshardingEventType::from_shard_layout(&shard_layout, block_info)? { Some(ReshardingEventType::SplitShard(params)) => params, None => return Ok(None), }; diff --git a/core/store/src/db.rs b/core/store/src/db.rs index dd41c33e8fd..37e3dc30841 100644 --- a/core/store/src/db.rs +++ b/core/store/src/db.rs @@ -262,7 +262,7 @@ pub trait Database: Sync + Send { /// If this is a test database, return a copy of the entire database. /// Otherwise return None. - fn copy_if_test(&self) -> Option> { + fn copy_if_test(&self, _columns_to_keep: Option<&[DBCol]>) -> Option> { None } } diff --git a/core/store/src/db/testdb.rs b/core/store/src/db/testdb.rs index 66dbb779bc6..3db1b5c4d1c 100644 --- a/core/store/src/db/testdb.rs +++ b/core/store/src/db/testdb.rs @@ -154,11 +154,16 @@ impl Database for TestDB { Ok(()) } - fn copy_if_test(&self) -> Option> { + fn copy_if_test(&self, columns_to_keep: Option<&[DBCol]>) -> Option> { let mut copy = Self::default(); { let mut db = copy.db.write().unwrap(); for (col, map) in self.db.read().unwrap().iter() { + if let Some(keep) = columns_to_keep { + if !keep.contains(&col) { + continue; + } + } let new_col = &mut db[col]; for (key, value) in map.iter() { new_col.insert(key.clone(), value.clone()); diff --git a/core/store/src/flat/manager.rs b/core/store/src/flat/manager.rs index 31cb9a93e39..da17564ccad 100644 --- a/core/store/src/flat/manager.rs +++ b/core/store/src/flat/manager.rs @@ -3,8 +3,6 @@ use crate::flat::{ BlockInfo, FlatStorageReadyStatus, FlatStorageReshardingStatus, FlatStorageStatus, POISONED_LOCK_ERR, }; -use crate::{DBCol, StoreAdapter}; -use near_primitives::block_header::BlockHeader; use near_primitives::errors::StorageError; use near_primitives::hash::CryptoHash; use near_primitives::shard_layout::ShardUId; @@ -99,34 +97,12 @@ impl FlatStorageManager { Ok(()) } - fn read_block_info(&self, hash: &CryptoHash) -> Result { - let header = self - .0 - .store - .store_ref() - .get_ser::(DBCol::BlockHeader, hash.as_ref()) - .map_err(|e| { - StorageError::StorageInconsistentState(format!( - "could not read block header {}: {:?}", - hash, e - )) - })? - .ok_or_else(|| { - StorageError::StorageInconsistentState(format!("block header {} not found", hash)) - })?; - Ok(BlockInfo { - hash: *header.hash(), - prev_hash: *header.prev_hash(), - height: header.height(), - }) - } - /// Sets the status to `Ready` if it's currently `Resharding(CatchingUp)` fn mark_flat_storage_ready(&self, shard_uid: ShardUId) -> Result<(), StorageError> { // Don't use Self::get_flat_storage_status() because there's no need to panic if this fails, since this is used // during state snapshotting where an error isn't critical to node operation. let status = self.0.store.get_flat_storage_status(shard_uid)?; - let catchup_flat_head = match status { + let flat_head = match status { FlatStorageStatus::Ready(_) => return Ok(()), FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp(flat_head)) => { flat_head @@ -138,7 +114,6 @@ impl FlatStorageManager { ))) } }; - let flat_head = self.read_block_info(&catchup_flat_head)?; let mut store_update = self.0.store.store_update(); store_update.set_flat_storage_status( shard_uid, @@ -311,9 +286,8 @@ impl FlatStorageManager { for shard_uid in shard_uids { match self.0.store.get_flat_storage_status(shard_uid)? { FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp( - catchup_flat_head, + flat_head, )) => { - let flat_head = self.read_block_info(&catchup_flat_head)?; if let Some(Some(min_height)) = ret { ret = Some(Some(std::cmp::min(min_height, flat_head.height))); } else { diff --git a/core/store/src/flat/types.rs b/core/store/src/flat/types.rs index 87b718eb75b..d2bb0672a8d 100644 --- a/core/store/src/flat/types.rs +++ b/core/store/src/flat/types.rs @@ -1,4 +1,5 @@ use borsh::{BorshDeserialize, BorshSerialize}; +use near_primitives::block::Tip; use near_primitives::errors::StorageError; use near_primitives::hash::CryptoHash; use near_primitives::shard_layout::{ShardLayout, ShardUId}; @@ -19,6 +20,12 @@ impl BlockInfo { } } +impl From for BlockInfo { + fn from(tip: Tip) -> Self { + Self { hash: tip.last_block_hash, height: tip.height, prev_hash: tip.prev_block_hash } + } +} + #[derive(strum::AsRefStr, strum::Display, Debug, PartialEq, Eq, thiserror::Error)] pub enum FlatStorageError { /// This means we can't find a path from `flat_head` to the block. Includes @@ -148,8 +155,8 @@ pub enum FlatStorageReshardingStatus { /// This shard (child) is being built from state taken from its parent. CreatingChild, /// We apply deltas from disk until the head reaches final head. - /// Includes block hash of flat storage head. - CatchingUp(CryptoHash), + /// Includes block info for the flat storage head. + CatchingUp(BlockInfo), } /// Current step of fetching state to fill flat storage. @@ -187,8 +194,8 @@ pub struct ParentSplitParameters { pub right_child_shard: ShardUId, /// The new shard layout. pub shard_layout: ShardLayout, - /// Hash of the last block having the old shard layout. - pub resharding_hash: CryptoHash, + /// Info of the last block having the old shard layout. + pub resharding_block: BlockInfo, /// Parent's flat head state when the split began. pub flat_head: BlockInfo, } diff --git a/core/store/src/opener.rs b/core/store/src/opener.rs index 0e896d2bf4f..c981ad7fef5 100644 --- a/core/store/src/opener.rs +++ b/core/store/src/opener.rs @@ -602,7 +602,7 @@ pub fn checkpoint_hot_storage_and_cleanup_columns( let _span = tracing::info_span!(target: "state_snapshot", "checkpoint_hot_storage_and_cleanup_columns") .entered(); - if let Some(storage) = hot_store.storage.copy_if_test() { + if let Some(storage) = hot_store.storage.copy_if_test(columns_to_keep) { return Ok(NodeStorage::new(storage)); } let checkpoint_path = checkpoint_base_path.join("data"); diff --git a/tools/protocol-schema-check/res/protocol_schema.toml b/tools/protocol-schema-check/res/protocol_schema.toml index de2f59aa25b..a280f8e61dd 100644 --- a/tools/protocol-schema-check/res/protocol_schema.toml +++ b/tools/protocol-schema-check/res/protocol_schema.toml @@ -137,8 +137,8 @@ FlatStateDeltaMetadata = 3401366797 FlatStateValue = 83834662 FlatStorageCreationStatus = 3717607657 FlatStorageReadyStatus = 677315221 -FlatStorageReshardingStatus = 1231081276 -FlatStorageStatus = 4109699695 +FlatStorageReshardingStatus = 3384401701 +FlatStorageStatus = 3324269976 FunctionCallAction = 2405840012 FunctionCallError = 3652274053 FunctionCallPermission = 1517509673 @@ -163,7 +163,7 @@ MethodResolveError = 1206790835 MissingTrieValueContext = 2666011379 NextEpochValidatorInfo = 3660299258 NonDelegateAction = 3255205790 -ParentSplitParameters = 1570407998 +ParentSplitParameters = 1570710157 PartialEdgeInfo = 1350359189 PartialEncodedChunk = 3453254449 PartialEncodedChunkForwardMsg = 68012243 From 7792dc990143d8d36d9afbdf1609d72cb6d9f009 Mon Sep 17 00:00:00 2001 From: Dmytrol <46675332+Dimitrolito@users.noreply.github.com> Date: Fri, 20 Dec 2024 14:28:51 +0200 Subject: [PATCH 02/15] Fix typos in various Rust and TypeScript files (#12643) **What does this pull request do? Explain your changes.** This pull request addresses several typographical errors in various files within the repository. The following files were updated: - `futures.rs` (in `core/async/src/test_loop/`) - `metrics.rs` (in `core/o11y/src/`) - `types.rs` (in `core/primitives-core/src/`) - `zero_balance_account.rs` (in `integration-tests/src/tests/client/features/`) - `runner.rs` (in `runtime/near-vm-runner/src/`) **Specific updates:** - Fixed the usage of incorrect articles such as changing "a event-loop-based" to "an event-loop-based." - Corrected other minor typographical issues across different files. **How did you test each of these updates?** The changes were tested by reviewing the files for correctness, ensuring that the typographical issues were addressed without affecting the functionality of the code. No additional tests were run since these are purely textual corrections. **Does this pull request close any open issues?** **Checklist:** - [ ] Read the [contribution guide](./CONTRIBUTING.md) - [ ] `make` runs successfully - [ ] All tests in `./test.sh` pass - [ ] README and other documentation updated - [ ] [Pending changelog](./CHANGELOG_PENDING.md) updated --------- Co-authored-by: Aleksandr Logunov --- core/async/src/test_loop/futures.rs | 2 +- core/o11y/src/metrics.rs | 2 +- core/primitives-core/src/types.rs | 2 +- .../src/tests/client/features/zero_balance_account.rs | 2 +- runtime/near-vm-runner/src/runner.rs | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/core/async/src/test_loop/futures.rs b/core/async/src/test_loop/futures.rs index d9abadca3d6..b7ce7e2176e 100644 --- a/core/async/src/test_loop/futures.rs +++ b/core/async/src/test_loop/futures.rs @@ -6,7 +6,7 @@ //! To support this, pass test_loop.future_spawner() the &dyn FutureSpawner //! to any component that needs to spawn futures. //! -//! This causes any futures spawned during the test to end up as an callback in the +//! This causes any futures spawned during the test to end up as a callback in the //! test loop. The event will eventually be executed by the drive_futures function, //! which will drive the future until it is either suspended or completed. If suspended, //! then the waker of the future (called when the future is ready to resume) will place diff --git a/core/o11y/src/metrics.rs b/core/o11y/src/metrics.rs index f514d852335..ab26e059b74 100644 --- a/core/o11y/src/metrics.rs +++ b/core/o11y/src/metrics.rs @@ -8,7 +8,7 @@ //! `observe()` method to record durations (e.g., block processing time). //! - `IncCounter`: used to represent an ideally ever-growing, never-shrinking //! integer (e.g., number of block processing requests). -//! - `IntGauge`: used to represent an varying integer (e.g., number of +//! - `IntGauge`: used to represent a varying integer (e.g., number of //! attestations per block). //! //! ## Important diff --git a/core/primitives-core/src/types.rs b/core/primitives-core/src/types.rs index 5c7c673c456..2321afa5e32 100644 --- a/core/primitives-core/src/types.rs +++ b/core/primitives-core/src/types.rs @@ -54,7 +54,7 @@ pub type ProtocolVersion = u32; /// used instead. pub type ShardIndex = usize; -/// The shard identifier. It may be a arbitrary number - it does not need to be +/// The shard identifier. It may be an arbitrary number - it does not need to be /// a number in the range 0..NUM_SHARDS. The shard ids do not need to be /// sequential or contiguous. /// diff --git a/integration-tests/src/tests/client/features/zero_balance_account.rs b/integration-tests/src/tests/client/features/zero_balance_account.rs index f1499a60477..5bb1d146eba 100644 --- a/integration-tests/src/tests/client/features/zero_balance_account.rs +++ b/integration-tests/src/tests/client/features/zero_balance_account.rs @@ -339,7 +339,7 @@ fn test_storage_usage_components() { let config = config_store.get_config(PROTOCOL_VERSION); let account_overhead = config.fees.storage_usage_config.num_bytes_account as usize; let record_overhead = config.fees.storage_usage_config.num_extra_bytes_record as usize; - // The NEP proposes to fit 4 full access keys + 2 fn access keys in an zero balance account + // The NEP proposes to fit 4 full access keys + 2 fn access keys in a zero balance account let full_access = PUBLIC_KEY_STORAGE_USAGE + FULL_ACCESS_PERMISSION_STORAGE_USAGE + record_overhead; let fn_access = diff --git a/runtime/near-vm-runner/src/runner.rs b/runtime/near-vm-runner/src/runner.rs index b7be0586997..f8a178cae50 100644 --- a/runtime/near-vm-runner/src/runner.rs +++ b/runtime/near-vm-runner/src/runner.rs @@ -13,7 +13,7 @@ use std::sync::Arc; /// is to crash or maybe ban a peer and/or send a challenge. /// /// A `VMOutcome` is a graceful completion of a VM execution. It can also contain -/// an guest error message in the `aborted` field. But these are not errors in +/// a guest error message in the `aborted` field. But these are not errors in /// the real sense, those are just reasons why execution failed at some point. /// Such as when a smart contract code panics. /// Note that the fact that `VMOutcome` contains is tracked on the blockchain. From 71f581fcfafcd3c16b3a8420136258e52a313531 Mon Sep 17 00:00:00 2001 From: Moritz Zielke Date: Fri, 20 Dec 2024 15:29:12 +0100 Subject: [PATCH 03/15] test: update config adjustments for `benchmarknet` (#12659) Previously, for benchmarknet some fields of `CongestionControlConfig` and `WitnessConfig` had been modified. As these structs evolved, the modifications became outdated. This PR uses `test_disabled()` to generate entire `CongestionControlConfig` and `WitnessConfig` structs with all limits disabled. With this approach, fewer updates to benchmarknet adjustments should be required in the future. Note that this config is effective only when `chain_id: "benchmarknet"`, which should happen only in tests or benchmarks. --- core/parameters/src/config_store.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/core/parameters/src/config_store.rs b/core/parameters/src/config_store.rs index 895e3b59e00..10e3eddb858 100644 --- a/core/parameters/src/config_store.rs +++ b/core/parameters/src/config_store.rs @@ -1,4 +1,4 @@ -use crate::config::{CongestionControlConfig, RuntimeConfig}; +use crate::config::{CongestionControlConfig, RuntimeConfig, WitnessConfig}; use crate::parameter_table::{ParameterTable, ParameterTableDiff}; use crate::vm; use near_primitives_core::types::ProtocolVersion; @@ -147,11 +147,8 @@ impl RuntimeConfigStore { near_primitives_core::chains::BENCHMARKNET => { let mut config_store = Self::new(None); let mut config = RuntimeConfig::clone(config_store.get_config(PROTOCOL_VERSION)); - config.congestion_control_config.max_tx_gas = 10u64.pow(16); - config.congestion_control_config.min_tx_gas = 10u64.pow(16); - config.witness_config.main_storage_proof_size_soft_limit = 999_999_999_999_999; - config.witness_config.new_transactions_validation_state_size_soft_limit = - 999_999_999_999_999; + config.congestion_control_config = CongestionControlConfig::test_disabled(); + config.witness_config = WitnessConfig::test_disabled(); let mut wasm_config = vm::Config::clone(&config.wasm_config); wasm_config.limit_config.per_receipt_storage_proof_size_limit = 999_999_999_999_999; config.wasm_config = Arc::new(wasm_config); @@ -460,6 +457,6 @@ mod tests { fn test_benchmarknet_config() { let store = RuntimeConfigStore::for_chain_id(near_primitives_core::chains::BENCHMARKNET); let config = store.get_config(PROTOCOL_VERSION); - assert_eq!(config.witness_config.main_storage_proof_size_soft_limit, 999_999_999_999_999); + assert_eq!(config.witness_config.main_storage_proof_size_soft_limit, usize::MAX); } } From 33afc1d35dffb4660d8c3423385540004704cc09 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Sat, 21 Dec 2024 08:50:34 +0800 Subject: [PATCH 04/15] feat: fix chunk producer staking threshold (#12660) In case of **empty seats**, the seat price of cp used to depend on the number of existing shards (approximately `block_producer_threshold / num_shard`), but it is no longer the case. Hence the fix. cc @Longarithm @wacban --- chain/epoch-manager/src/tests/mod.rs | 6 +- .../epoch-manager/src/validator_selection.rs | 21 +-- core/chain-configs/src/test_genesis.rs | 38 ++++- core/primitives-core/src/version.rs | 6 +- .../fix_chunk_producer_stake_threshold.rs | 144 ++++++++++++++++++ .../test_loop/tests/fix_stake_threshold.rs | 10 +- integration-tests/src/test_loop/tests/mod.rs | 1 + 7 files changed, 205 insertions(+), 21 deletions(-) create mode 100644 integration-tests/src/test_loop/tests/fix_chunk_producer_stake_threshold.rs diff --git a/chain/epoch-manager/src/tests/mod.rs b/chain/epoch-manager/src/tests/mod.rs index a9dfb2f79a2..60a23c69bca 100644 --- a/chain/epoch-manager/src/tests/mod.rs +++ b/chain/epoch-manager/src/tests/mod.rs @@ -2514,13 +2514,13 @@ fn test_epoch_validators_cache() { #[test] fn test_chunk_producers() { let amount_staked = 1_000_000; - // Make sure that last validator has at least 160/1'000'000 / num_shards of stake. + // Make sure that last validator has at least 160/1'000'000 of stake. // We're running with 2 shards and test1 + test2 has 2'000'000 tokens - so chunk_only should have over 160. let validators = vec![ ("test1".parse().unwrap(), amount_staked), ("test2".parse().unwrap(), amount_staked), - ("chunk_only".parse().unwrap(), 200), - ("not_enough_producer".parse().unwrap(), 100), + ("chunk_only".parse().unwrap(), 321), + ("not_enough_producer".parse().unwrap(), 320), ]; // There are 2 shards, and 2 block producers seats. diff --git a/chain/epoch-manager/src/validator_selection.rs b/chain/epoch-manager/src/validator_selection.rs index 2b7bcc4afb1..7f081b5f783 100644 --- a/chain/epoch-manager/src/validator_selection.rs +++ b/chain/epoch-manager/src/validator_selection.rs @@ -373,12 +373,13 @@ fn select_chunk_producers( num_shards: u64, protocol_version: ProtocolVersion, ) -> (Vec, BinaryHeap, Balance) { - select_validators( - all_proposals, - max_num_selected, - min_stake_ratio * Ratio::new(1, num_shards as u128), - protocol_version, - ) + let min_stake_ratio = + if checked_feature!("stable", FixChunkProducerStakingThreshold, protocol_version) { + min_stake_ratio + } else { + min_stake_ratio * Ratio::new(1, num_shards as u128) + }; + select_validators(all_proposals, max_num_selected, min_stake_ratio, protocol_version) } // Takes the top N proposals (by stake), or fewer if there are not enough or the @@ -1037,10 +1038,10 @@ mod tests { // Given `epoch_info` and `proposals` above, the sample at a given height is deterministic. let height = 42; let expected_assignments = vec![ - vec![(4, 56), (1, 168), (2, 300), (3, 84), (0, 364)], - vec![(3, 70), (1, 300), (4, 42), (2, 266), (0, 308)], - vec![(4, 42), (1, 238), (3, 42), (0, 450), (2, 196)], - vec![(2, 238), (1, 294), (3, 64), (0, 378)], + vec![(2, 192), (0, 396), (1, 280)], + vec![(1, 216), (2, 264), (0, 396)], + vec![(0, 396), (2, 288), (1, 192)], + vec![(2, 256), (1, 312), (0, 312)], ]; assert_eq!(epoch_info.sample_chunk_validators(height), expected_assignments); } diff --git a/core/chain-configs/src/test_genesis.rs b/core/chain-configs/src/test_genesis.rs index a77fc646ff5..42cbcc9c76a 100644 --- a/core/chain-configs/src/test_genesis.rs +++ b/core/chain-configs/src/test_genesis.rs @@ -139,7 +139,7 @@ impl Default for TestEpochConfigBuilder { fishermen_threshold: FISHERMEN_THRESHOLD, protocol_upgrade_stake_threshold: PROTOCOL_UPGRADE_STAKE_THRESHOLD, minimum_stake_divisor: 10, - minimum_stake_ratio: Rational32::new(160i32, 1_000_000i32), + minimum_stake_ratio: Rational32::new(16i32, 1_000_000i32), chunk_producer_assignment_changes_limit: 5, shuffle_shard_assignment_for_chunk_producers: false, // consider them ineffective @@ -628,6 +628,42 @@ pub struct GenesisAndEpochConfigParams<'a> { /// Handy factory for building test genesis and epoch config store. Use it if it is enough to have /// one epoch config for your test. Otherwise, just use builders directly. +/// +/// ``` +/// use near_chain_configs::test_genesis::build_genesis_and_epoch_config_store; +/// use near_chain_configs::test_genesis::GenesisAndEpochConfigParams; +/// use near_chain_configs::test_genesis::ValidatorsSpec; +/// use near_primitives::shard_layout::ShardLayout; +/// use near_primitives::test_utils::create_test_signer; +/// use near_primitives::types::AccountId; +/// use near_primitives::types::AccountInfo; +/// +/// const ONE_NEAR: u128 = 1_000_000_000_000_000_000_000_000; +/// +/// let protocol_version = 73; +/// let epoch_length = 10; +/// let accounts = (0..6).map(|i| format!("test{}", i).parse().unwrap()).collect::>(); +/// let shard_layout = ShardLayout::multi_shard(6, 1); +/// let validators = vec![ +/// AccountInfo { +/// account_id: accounts[0].clone(), +/// public_key: create_test_signer(accounts[0].as_str()).public_key(), +/// amount: 62500 * ONE_NEAR, +/// }, +/// ]; +/// let validators_spec = ValidatorsSpec::raw(validators, 3, 3, 3); +/// let (genesis, epoch_config_store) = build_genesis_and_epoch_config_store( +/// GenesisAndEpochConfigParams { +/// protocol_version, +/// epoch_length, +/// accounts: &accounts, +/// shard_layout, +/// validators_spec, +/// }, +/// |genesis_builder| genesis_builder.genesis_height(10000).transaction_validity_period(1000), +/// |epoch_config_builder| epoch_config_builder.shuffle_shard_assignment_for_chunk_producers(true), +/// ); +/// ``` pub fn build_genesis_and_epoch_config_store<'a>( params: GenesisAndEpochConfigParams<'a>, customize_genesis_builder: impl FnOnce(TestGenesisBuilder) -> TestGenesisBuilder, diff --git a/core/primitives-core/src/version.rs b/core/primitives-core/src/version.rs index cb8e9b4e6fa..d9b15819502 100644 --- a/core/primitives-core/src/version.rs +++ b/core/primitives-core/src/version.rs @@ -121,6 +121,9 @@ pub enum ProtocolFeature { /// price - it reports as alpha * sum_stake instead of alpha * sum_stake / (1 - alpha), where /// alpha is min stake ratio FixStakingThreshold, + /// In case not all validator seats are occupied, the minimum seat price of a chunk producer + /// used to depend on the number of existing shards, which is no longer the case. + FixChunkProducerStakingThreshold, /// Charge for contract loading before it happens. #[cfg(feature = "protocol_feature_fix_contract_loading_cost")] FixContractLoadingCost, @@ -253,7 +256,8 @@ impl ProtocolFeature { | ProtocolFeature::StateStoredReceipt => 72, ProtocolFeature::ExcludeContractCodeFromStateWitness => 73, ProtocolFeature::FixStakingThreshold - | ProtocolFeature::RejectBlocksWithOutdatedProtocolVersions => 74, + | ProtocolFeature::RejectBlocksWithOutdatedProtocolVersions + | ProtocolFeature::FixChunkProducerStakingThreshold => 74, // This protocol version is reserved for use in resharding tests. An extra resharding // is simulated on top of the latest shard layout in production. Note that later diff --git a/integration-tests/src/test_loop/tests/fix_chunk_producer_stake_threshold.rs b/integration-tests/src/test_loop/tests/fix_chunk_producer_stake_threshold.rs new file mode 100644 index 00000000000..df8c2952456 --- /dev/null +++ b/integration-tests/src/test_loop/tests/fix_chunk_producer_stake_threshold.rs @@ -0,0 +1,144 @@ +use crate::test_loop::builder::TestLoopBuilder; +use crate::test_loop::env::TestLoopEnv; +use crate::test_loop::utils::validators::get_epoch_all_validators; +use crate::test_loop::utils::ONE_NEAR; +use near_async::test_loop::data::TestLoopData; +use near_async::time::Duration; +use near_chain_configs::test_genesis::build_genesis_and_epoch_config_store; +use near_chain_configs::test_genesis::GenesisAndEpochConfigParams; +use near_chain_configs::test_genesis::ValidatorsSpec; +use near_o11y::testonly::init_test_logger; +use near_primitives::shard_layout::ShardLayout; +use near_primitives::test_utils::create_test_signer; +use near_primitives::types::AccountId; +use near_primitives::types::AccountInfo; +use near_primitives_core::version::ProtocolFeature; + +#[test] +fn slow_test_fix_cp_stake_threshold() { + init_test_logger(); + + let protocol_version = ProtocolFeature::FixChunkProducerStakingThreshold.protocol_version() - 1; + let epoch_length = 10; + let accounts = + (0..6).map(|i| format!("test{}", i).parse().unwrap()).collect::>(); + let clients = accounts.iter().cloned().collect::>(); + let num_shards = 6; + let shard_layout = ShardLayout::multi_shard(num_shards, 1); + let validators = vec![ + AccountInfo { + account_id: accounts[0].clone(), + public_key: create_test_signer(accounts[0].as_str()).public_key(), + amount: 30 * 62500 * ONE_NEAR, + }, + AccountInfo { + account_id: accounts[1].clone(), + public_key: create_test_signer(accounts[1].as_str()).public_key(), + amount: 30 * 62500 * ONE_NEAR, + }, + AccountInfo { + account_id: accounts[2].clone(), + public_key: create_test_signer(accounts[2].as_str()).public_key(), + // cp min stake ratio was `(1 / 62500) / num_shards` before the fix + // stake is right at threshold, and proposal should not be approved + amount: ((30 + 30) * 62500 / 62500 / num_shards) as u128 * ONE_NEAR, + }, + AccountInfo { + account_id: accounts[3].clone(), + public_key: create_test_signer(accounts[3].as_str()).public_key(), + // stake is above threshold, so proposal should be approved + amount: ((30 + 30) * 62500 / 62500 / num_shards + 1) as u128 * ONE_NEAR, + }, + ]; + let validators_spec = ValidatorsSpec::raw(validators, 5, 5, 5); + let (genesis, epoch_config_store) = build_genesis_and_epoch_config_store( + GenesisAndEpochConfigParams { + protocol_version, + epoch_length, + accounts: &accounts, + shard_layout, + validators_spec, + }, + |genesis_builder| genesis_builder, + |epoch_config_builder| epoch_config_builder, + ); + + let TestLoopEnv { mut test_loop, datas: node_data, tempdir } = TestLoopBuilder::new() + .genesis(genesis) + .epoch_config_store(epoch_config_store.clone()) + .clients(clients) + .build(); + + let sender = node_data[0].client_sender.clone(); + let handle = sender.actor_handle(); + let client = &test_loop.data.get(&handle).client; + + // premise checks + let epoch_id = client + .epoch_manager + .get_epoch_id_from_prev_block(&client.chain.head().unwrap().last_block_hash) + .unwrap(); + let protocol_version = client.epoch_manager.get_epoch_protocol_version(&epoch_id).unwrap(); + let validators = get_epoch_all_validators(client); + assert!( + protocol_version < ProtocolFeature::FixChunkProducerStakingThreshold.protocol_version() + ); + assert_eq!( + epoch_config_store.get_config(protocol_version).shard_layout.num_shards(), + num_shards + ); + assert_eq!( + validators, + vec![String::from("test0"), String::from("test1"), String::from("test3")] + ); + + test_loop.run_until( + |test_loop_data: &mut TestLoopData| { + let client = &test_loop_data.get(&handle).client; + let head = client.chain.head().unwrap(); + let epoch_height = client + .epoch_manager + .get_epoch_height_from_prev_block(&head.prev_block_hash) + .unwrap(); + // ensure loop is exited because of condition is met + assert!(epoch_height < 3); + + let epoch_id = client + .epoch_manager + .get_epoch_id_from_prev_block(&client.chain.head().unwrap().last_block_hash) + .unwrap(); + let protocol_version = + client.epoch_manager.get_epoch_protocol_version(&epoch_id).unwrap(); + // exits when protocol version catches up with the fix + protocol_version >= ProtocolFeature::FixChunkProducerStakingThreshold.protocol_version() + }, + Duration::seconds(4 * epoch_length as i64), + ); + + test_loop.run_until( + |test_loop_data: &mut TestLoopData| { + let client = &test_loop_data.get(&handle).client; + let head = client.chain.head().unwrap(); + let epoch_height = client + .epoch_manager + .get_epoch_height_from_prev_block(&head.prev_block_hash) + .unwrap(); + // ensure loop is exited because of condition is met + assert!(epoch_height < 5); + + // threshold is raised to approximately 6x of the previous in this case as threshold is + // no longer divided by num_shards (6), so test3's proposal won't be approved + let validators = get_epoch_all_validators(client); + if validators.len() == 2 { + assert_eq!(validators, vec![String::from("test0"), String::from("test1")]); + true + } else { + false + } + }, + Duration::seconds(3 * epoch_length as i64), + ); + + TestLoopEnv { test_loop, datas: node_data, tempdir } + .shutdown_and_drain_remaining_events(Duration::seconds(20)); +} diff --git a/integration-tests/src/test_loop/tests/fix_stake_threshold.rs b/integration-tests/src/test_loop/tests/fix_stake_threshold.rs index 89c25b43e50..c7ccbb22d92 100644 --- a/integration-tests/src/test_loop/tests/fix_stake_threshold.rs +++ b/integration-tests/src/test_loop/tests/fix_stake_threshold.rs @@ -88,9 +88,8 @@ fn slow_test_fix_validator_stake_threshold() { // prior to threshold fix // threshold = min_stake_ratio * total_stake // = (1 / 62_500) * total_stake - // TODO Chunk producer stake threshold is dependent on the number of shards, which should no - // longer be the case. Get rid of num_shards once protocol is updated to correct the ratio. - assert_eq!(epoch_info.seat_price() * num_shards as u128 / ONE_NEAR, 600_000); + // Chunk producer stake threshold used to be dependent on the number of shards. + assert_eq!(epoch_info.seat_price() / ONE_NEAR, 600_000 / num_shards as u128); test_loop.run_until( |test_loop_data: &mut TestLoopData| { @@ -112,12 +111,11 @@ fn slow_test_fix_validator_stake_threshold() { client.epoch_manager.get_epoch_protocol_version(&epoch_id).unwrap(); if protocol_version >= ProtocolFeature::FixStakingThreshold.protocol_version() { let epoch_info = client.epoch_manager.get_epoch_info(&epoch_id).unwrap(); - let num_shards = - epoch_config_store.get_config(protocol_version).shard_layout.num_shards(); // after threshold fix // threshold = min_stake_ratio * total_stake / (1 - min_stake_ratio) // = (1 / 62_500) * total_stake / (62_499 / 62_500) - assert_eq!(epoch_info.seat_price() * num_shards as u128 / ONE_NEAR, 600_001); + // = total_stake / 62_499 + assert_eq!(epoch_info.seat_price() / ONE_NEAR, total_stake / 62_499 / ONE_NEAR); true } else { false diff --git a/integration-tests/src/test_loop/tests/mod.rs b/integration-tests/src/test_loop/tests/mod.rs index 79808e7b317..4fda42f66d7 100644 --- a/integration-tests/src/test_loop/tests/mod.rs +++ b/integration-tests/src/test_loop/tests/mod.rs @@ -7,6 +7,7 @@ mod contract_distribution_cross_shard; mod contract_distribution_simple; mod create_delete_account; mod epoch_sync; +mod fix_chunk_producer_stake_threshold; mod fix_min_stake_ratio; mod fix_stake_threshold; mod in_memory_tries; From 9623aa3a5c0e185d46bd5690b2bc37c11c2e9015 Mon Sep 17 00:00:00 2001 From: Romashka Date: Tue, 24 Dec 2024 17:01:31 +0200 Subject: [PATCH 05/15] typo-Update chain.rs (#12665) # Fix: Corrected typo in source file ## Changes - `chain/chain/src/chain.rs:3918`: "preprocesing" corrected to "preprocessing". ## Purpose - Improved code accuracy and ensured professionalism by fixing the typo. --- chain/chain/src/chain.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chain/chain/src/chain.rs b/chain/chain/src/chain.rs index b8029caa135..77fa1a00aa8 100644 --- a/chain/chain/src/chain.rs +++ b/chain/chain/src/chain.rs @@ -3915,7 +3915,7 @@ impl Chain { } /// Function to check whether we need to create a new snapshot while processing the current block - /// Note that this functions is called as a part of block preprocesing, so the head is not updated to current block + /// Note that this functions is called as a part of block preprocessing, so the head is not updated to current block fn should_make_or_delete_snapshot(&mut self) -> Result { // head value is that of the previous block, i.e. curr_block.prev_hash let head = self.head()?; From 602d5768acae7f421d05730e67d1297196e82c3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Chuda=C5=9B?= <18039094+staffik@users.noreply.github.com> Date: Wed, 25 Dec 2024 09:16:18 +0100 Subject: [PATCH 06/15] fix(testloop): set epoch length in testloop (#12664) Some resharding tests failed if an RPC node was used to submit transactions. It worked without RPC because `client0` was used by default, and it turned out this client was a chunk validator for the child shard after resharding. It did not work with RPC (or any client other than `client0`) because a transaction near epoch boundary was not forwarded to `client0`. It was not forwarded because `config.epoch_length` was not properly set in testloop. It has default value `10`, while resharding tests use `6`. That caused `get_next_epoch_id_if_at_boundary` to return different result that expected. In consequence, transaction was not sent to `client0`, which was a chunk producer for next epoch. --- chain/chunks/src/client.rs | 2 +- chain/client/src/client.rs | 2 +- integration-tests/src/test_loop/builder.rs | 1 + .../src/test_loop/tests/resharding_v3.rs | 13 +++++-------- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/chain/chunks/src/client.rs b/chain/chunks/src/client.rs index 0a334e89cf2..efccebb3a36 100644 --- a/chain/chunks/src/client.rs +++ b/chain/chunks/src/client.rs @@ -122,7 +122,7 @@ impl ShardedTransactionPool { /// the new shard layout. /// It works by emptying the pools for old shard uids and re-inserting the /// transactions back to the pool with the new shard uids. - /// TODO check if this logic works in resharding V3 + /// TODO(resharding) check if this logic works in resharding V3 pub fn reshard(&mut self, old_shard_layout: &ShardLayout, new_shard_layout: &ShardLayout) { tracing::debug!( target: "resharding", diff --git a/chain/client/src/client.rs b/chain/client/src/client.rs index 57bb66497f4..00dc12711ac 100644 --- a/chain/client/src/client.rs +++ b/chain/client/src/client.rs @@ -1684,7 +1684,7 @@ impl Client { // layout is changing we need to reshard the transaction pool. // TODO make sure transactions don't get added for the old shard // layout after the pool resharding - // TODO check if this logic works in resharding V3 + // TODO(resharding) check if this logic works in resharding V3 if self.epoch_manager.is_next_block_epoch_start(&block_hash).unwrap_or(false) { let new_shard_layout = self.epoch_manager.get_shard_layout_from_prev_block(&block_hash); diff --git a/integration-tests/src/test_loop/builder.rs b/integration-tests/src/test_loop/builder.rs index ae89b2eb31b..436eb230501 100644 --- a/integration-tests/src/test_loop/builder.rs +++ b/integration-tests/src/test_loop/builder.rs @@ -503,6 +503,7 @@ impl TestLoopBuilder { let genesis = self.genesis.as_ref().unwrap(); let epoch_config_store = self.epoch_config_store.as_ref().unwrap(); let mut client_config = ClientConfig::test(true, 600, 2000, 4, is_archival, true, false); + client_config.epoch_length = genesis.config.epoch_length; client_config.max_block_wait_delay = Duration::seconds(6); client_config.state_sync_enabled = true; client_config.state_sync_external_timeout = Duration::milliseconds(100); diff --git a/integration-tests/src/test_loop/tests/resharding_v3.rs b/integration-tests/src/test_loop/tests/resharding_v3.rs index 2f4e2045c87..d806861532c 100644 --- a/integration-tests/src/test_loop/tests/resharding_v3.rs +++ b/integration-tests/src/test_loop/tests/resharding_v3.rs @@ -316,6 +316,7 @@ fn call_burn_gas_contract( receiver_ids: Vec, gas_burnt_per_call: Gas, ) -> LoopActionFn { + // Must be less than epoch length, otherwise transactions won't be checked. const TX_CHECK_BLOCKS_AFTER_RESHARDING: u64 = 5; const CALLS_PER_BLOCK_HEIGHT: usize = 5; @@ -952,8 +953,7 @@ fn test_resharding_v3_delayed_receipts_right_child() { ReceiptKind::Delayed, )) .allow_negative_refcount(true) - // TODO(resharding): test should work without changes to num_rpcs and track_all_shards - .num_rpcs(0) + // TODO(resharding): test should work without changes to track_all_shards .track_all_shards(true) .build(); test_resharding_v3_base(params); @@ -981,8 +981,7 @@ fn test_resharding_v3_split_parent_buffered_receipts_base(base_shard_layout_vers vec![account_in_left_child], ReceiptKind::Buffered, )) - // TODO(resharding): test should work without changes to num_rpcs and track_all_shards - .num_rpcs(0) + // TODO(resharding): test should work without changes to track_all_shards .track_all_shards(true) .build(); test_resharding_v3_base(params); @@ -1071,8 +1070,7 @@ fn test_resharding_v3_outgoing_receipts_from_splitted_shard() { vec![receiver_account], 5 * TGAS, )) - // TODO(resharding): test should work without changes to num_rpcs and track_all_shards - .num_rpcs(0) + // TODO(resharding): test should work without changes to track_all_shards .track_all_shards(true) .build(); test_resharding_v3_base(params); @@ -1146,8 +1144,7 @@ fn test_resharding_v3_yield_resume() { vec![account_in_left_child, account_in_right_child], ReceiptKind::PromiseYield, )) - // TODO(resharding): test should work without changes to num_rpcs and track_all_shards - .num_rpcs(0) + // TODO(resharding): test should work without changes to track_all_shards .track_all_shards(true) .build(); test_resharding_v3_base(params); From 4b2dd8eeb78705e3c88396ae2792ee1abc163734 Mon Sep 17 00:00:00 2001 From: Skylar Ray <137945430+sky-coderay@users.noreply.github.com> Date: Thu, 26 Dec 2024 00:06:41 +0200 Subject: [PATCH 07/15] chore: docs fix spelling issues (#12668) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fixed typo: "`accound `" → "Creates a new `account `" - Fixed typo: "`superfluouos `" → "Creates a new `superfluous `" - Fixed typo: "`alreay `" → "Creates a new `already `" --- tools/state-viewer/src/trie_iteration_benchmark.rs | 2 +- tools/themis/src/rules.rs | 2 +- tools/undo-block/src/lib.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/state-viewer/src/trie_iteration_benchmark.rs b/tools/state-viewer/src/trie_iteration_benchmark.rs index 2844b90716b..ba7d44622eb 100644 --- a/tools/state-viewer/src/trie_iteration_benchmark.rs +++ b/tools/state-viewer/src/trie_iteration_benchmark.rs @@ -242,7 +242,7 @@ impl TrieIterationBenchmarkCmd { col::DELAYED_RECEIPT_OR_INDICES => false, // Most columns use the ACCOUNT_DATA_SEPARATOR to indicate the end - // of the accound id in the trie key. For those columns the + // of the account id in the trie key. For those columns the // partial_parse_account_id method should be used. // The only exception is the ACCESS_KEY and dedicated method // partial_parse_account_id_from_access_key should be used. diff --git a/tools/themis/src/rules.rs b/tools/themis/src/rules.rs index aa4ed461a95..ea31eb47a10 100644 --- a/tools/themis/src/rules.rs +++ b/tools/themis/src/rules.rs @@ -403,7 +403,7 @@ pub fn recursively_publishable(workspace: &Workspace) -> anyhow::Result<()> { Ok(()) } -/// Ensure that top-level `Cargo.toml` does not contain superfluouos dependencies. +/// Ensure that top-level `Cargo.toml` does not contain superfluous dependencies. pub fn no_superfluous_deps(workspace: &Workspace) -> anyhow::Result<()> { let mut workspace_deps = BTreeSet::::new(); let mut read_deps = |manifest: &toml::value::Value, table_name: &'static str| { diff --git a/tools/undo-block/src/lib.rs b/tools/undo-block/src/lib.rs index 4f16a4ebb3a..1b9057c04c2 100644 --- a/tools/undo-block/src/lib.rs +++ b/tools/undo-block/src/lib.rs @@ -63,7 +63,7 @@ pub fn undo_only_block_head( tracing::info!(target: "neard", ?tail_height, ?current_head_height, ?current_header_height, "Trying to update head"); if current_head_height == tail_height { - tracing::info!(target: "neard", "Body head is alreay at the oldest block."); + tracing::info!(target: "neard", "Body head is already at the oldest block."); return Ok(()); } From 4b283cf5fb058884f3335e9f1a3661769051d01d Mon Sep 17 00:00:00 2001 From: chloefeal <188809157+chloefeal@users.noreply.github.com> Date: Sun, 29 Dec 2024 04:13:13 +0800 Subject: [PATCH 08/15] chore: fix some typos in comment (#12671) This PR focuses on correcting typos and improving clarity in documentation files. Thank you very much. --- core/store/src/metadata.rs | 2 +- docs/practices/workflows/benchmarking_synthetic_workloads.md | 2 +- integration-tests/src/test_loop/utils/trie_sanity.rs | 2 +- integration-tests/src/tests/runtime/test_yield_resume.rs | 2 +- pytest/tests/replay/README.md | 2 +- pytest/tests/sanity/split_storage.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/core/store/src/metadata.rs b/core/store/src/metadata.rs index ebff29084c9..0f685ab850f 100644 --- a/core/store/src/metadata.rs +++ b/core/store/src/metadata.rs @@ -56,7 +56,7 @@ pub(super) struct DbMetadata { impl DbMetadata { /// Reads metadata from the database. This method enforces the invariant - /// that version and kind must alwasy be set. + /// that version and kind must always be set. /// /// If the database version is not present, returns an error. Similarly, if /// database version is ≥ [`DB_VERSION_WITH_KIND`] but the kind is not diff --git a/docs/practices/workflows/benchmarking_synthetic_workloads.md b/docs/practices/workflows/benchmarking_synthetic_workloads.md index 0f76f18f58e..55dc4048974 100644 --- a/docs/practices/workflows/benchmarking_synthetic_workloads.md +++ b/docs/practices/workflows/benchmarking_synthetic_workloads.md @@ -11,7 +11,7 @@ This approach has the following benefits: The main drawbacks of synthetic benchmarks are: -- Drawing conclusions is limited as real world traffic is not homogenous. +- Drawing conclusions is limited as real world traffic is not homogeneous. - Calibrating traffic generation parameters can be cumbersome. The tooling for synthetic benchmarks is available in [`benchmarks/bm-synth`](../../../benchmarks/bm-synth). diff --git a/integration-tests/src/test_loop/utils/trie_sanity.rs b/integration-tests/src/test_loop/utils/trie_sanity.rs index c3d5147742d..56e102339bf 100644 --- a/integration-tests/src/test_loop/utils/trie_sanity.rs +++ b/integration-tests/src/test_loop/utils/trie_sanity.rs @@ -118,7 +118,7 @@ impl TrieSanityCheck { check_shard_uids } - // Check trie sanity and keep track of which shards were succesfully fully checked + // Check trie sanity and keep track of which shards were successfully fully checked pub fn assert_state_sanity(&mut self, clients: &[&Client], new_num_shards: NumShards) { for client in clients { let signer = client.validator_signer.get(); diff --git a/integration-tests/src/tests/runtime/test_yield_resume.rs b/integration-tests/src/tests/runtime/test_yield_resume.rs index c108796a0ce..8df6bff83aa 100644 --- a/integration-tests/src/tests/runtime/test_yield_resume.rs +++ b/integration-tests/src/tests/runtime/test_yield_resume.rs @@ -162,7 +162,7 @@ fn resume_without_yield() { ) .unwrap(); - // expect the execution to suceed, but return 'false' + // expect the execution to succeed, but return 'false' assert_eq!( res.status, FinalExecutionStatus::SuccessValue(vec![0u8]), diff --git a/pytest/tests/replay/README.md b/pytest/tests/replay/README.md index 7368c2c6cb6..3c13422a3d9 100644 --- a/pytest/tests/replay/README.md +++ b/pytest/tests/replay/README.md @@ -15,7 +15,7 @@ Prerequisites: In order to set up and launch replay, we take the following steps: -Make sure you have the right enviroment variables: +Make sure you have the right environment variables: ```shell export PYTHONPATH=./pytest/lib ``` diff --git a/pytest/tests/sanity/split_storage.py b/pytest/tests/sanity/split_storage.py index 6b60193b6cc..31a808e0251 100644 --- a/pytest/tests/sanity/split_storage.py +++ b/pytest/tests/sanity/split_storage.py @@ -125,7 +125,7 @@ def step2_archival_node_sync_test(self): logger.info(f"Starting the archival <- split storage sync test") # Archival nodes do not run state sync. This means that if peers - # ran away further than epoch_lenght * gc_epoch_num, archival nodes + # ran away further than epoch_length * gc_epoch_num, archival nodes # will not be able to further sync. In practice it means we need a long # enough epoch_length or more gc_epoch_num to keep. epoch_length = 10 From 9b870f9f4eb86344d23c92fbbf2dbf91683807eb Mon Sep 17 00:00:00 2001 From: planetBoy <140164174+Guayaba221@users.noreply.github.com> Date: Thu, 2 Jan 2025 09:12:10 +0100 Subject: [PATCH 09/15] fix spelling issues (#12670) **Setup up - Set up** **reasong - reason** --- tools/mock-node/src/setup.rs | 2 +- tools/state-viewer/src/replay_headers.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/mock-node/src/setup.rs b/tools/mock-node/src/setup.rs index f30d6212564..c379e0ed4ff 100644 --- a/tools/mock-node/src/setup.rs +++ b/tools/mock-node/src/setup.rs @@ -102,7 +102,7 @@ pub struct MockNode { pub rpc_client: JsonRpcClient, } -/// Setup up a mock node, including setting up +/// Set up a mock node, including setting up /// a MockPeerManagerActor and a ClientActor and a ViewClientActor /// `client_home_dir`: home dir for the new client /// `network_home_dir`: home dir that contains the pre-generated chain history, will be used diff --git a/tools/state-viewer/src/replay_headers.rs b/tools/state-viewer/src/replay_headers.rs index 8c75920eb46..86bfb177c5c 100644 --- a/tools/state-viewer/src/replay_headers.rs +++ b/tools/state-viewer/src/replay_headers.rs @@ -201,7 +201,7 @@ fn get_validator_summary( validator_to_summary } -/// Returns a mapping from validator account id to the kickout reasong (from previous epoch). +/// Returns a mapping from validator account id to the kickout reason (from previous epoch). fn get_validator_kickouts( validator_info: &EpochValidatorInfo, ) -> HashMap { From edaccb6c9f6df50937e7eecce87a9ce909f1bc2b Mon Sep 17 00:00:00 2001 From: Radovenchyk Date: Thu, 2 Jan 2025 10:12:30 +0200 Subject: [PATCH 10/15] Docs: Readme (#12663) Hey. I added a link that was missing in the documentation --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3c8b273870d..1cc55f352a3 100644 --- a/README.md +++ b/README.md @@ -11,13 +11,14 @@ ## Reference implementation of NEAR Protocol -![Buildkite](https://img.shields.io/buildkite/0eae07525f8e44a19b48fa937813e2c21ee04aa351361cd851) +[![Buildkite](https://img.shields.io/buildkite/0eae07525f8e44a19b48fa937813e2c21ee04aa351361cd851)][buildkite] ![Stable Status][stable-release] ![Prerelease Status][prerelease] [![codecov][codecov-badge]][codecov-url] [![Discord chat][discord-badge]][discord-url] [![Telegram Group][telegram-badge]][telegram-url] +[buildkite]: https://github.com/near/nearcore/actions [stable-release]: https://img.shields.io/github/v/release/nearprotocol/nearcore?label=stable [prerelease]: https://img.shields.io/github/v/release/nearprotocol/nearcore?include_prereleases&label=prerelease [ci-badge-master]: https://badge.buildkite.com/a81147cb62c585cc434459eedd1d25e521453120ead9ee6c64.svg?branch=master From 2338f72f37b328da2d884caef369f4114a825396 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Chuda=C5=9B?= <18039094+staffik@users.noreply.github.com> Date: Thu, 2 Jan 2025 13:49:48 +0100 Subject: [PATCH 11/15] fix(resharding): Increase epoch length for shard shuffling (#12674) Some resharding tests were failing with single shard tracking enabled because of epoch length was sligthly too short. Under the hood, if we are close to epoch length (4 blocks or less), we forward transactions to chunk producer for the next epoch too. But if the epoch length is 6, it means the chunk producer has only 2 blocks to catchup before it is supposed to start accepting transactions for the new shard. In such case, `TX_CHECK_BLOCKS_AFTER_RESHARDING` needs also be increased. **Changes:** This PR sets `INCREASED_EPOCH_LENGTH: 8` for these tests. For remaining tests, I keep the old `DEFAULT_EPOCH_LENGTH: 6`, to test potentially more corner cases, and some tests seem to be designed with this specific epoch length in mind. `TX_CHECK_BLOCKS_AFTER_RESHARDING ` is increased to almost max (`epoch_length - 2`). Loop actions are slightly refactored, introducing `LoopAction` to make sure these actions actually succeed before the test is over. It happened to me several times they did not, if some parameter was changed. --- .../src/test_loop/tests/resharding_v3.rs | 90 ++++++++++++------- .../src/test_loop/utils/loop_action.rs | 72 +++++++++++++++ integration-tests/src/test_loop/utils/mod.rs | 4 +- .../src/test_loop/utils/receipts.rs | 21 +++-- 4 files changed, 147 insertions(+), 40 deletions(-) create mode 100644 integration-tests/src/test_loop/utils/loop_action.rs diff --git a/integration-tests/src/test_loop/tests/resharding_v3.rs b/integration-tests/src/test_loop/tests/resharding_v3.rs index d806861532c..4c0864056f0 100644 --- a/integration-tests/src/test_loop/tests/resharding_v3.rs +++ b/integration-tests/src/test_loop/tests/resharding_v3.rs @@ -21,6 +21,7 @@ use std::sync::Arc; use crate::test_loop::builder::TestLoopBuilder; use crate::test_loop::env::{TestData, TestLoopEnv}; +use crate::test_loop::utils::loop_action::{LoopAction, LoopActionStatus}; use crate::test_loop::utils::receipts::{ check_receipts_presence_after_resharding_block, check_receipts_presence_at_resharding_block, ReceiptKind, @@ -35,7 +36,7 @@ use crate::test_loop::utils::transactions::{ use crate::test_loop::utils::trie_sanity::{ check_state_shard_uid_mapping_after_resharding, TrieSanityCheck, }; -use crate::test_loop::utils::{get_node_data, retrieve_client_actor, LoopActionFn, ONE_NEAR, TGAS}; +use crate::test_loop::utils::{get_node_data, retrieve_client_actor, ONE_NEAR, TGAS}; use assert_matches::assert_matches; use near_crypto::Signer; use near_parameters::{vm, RuntimeConfig, RuntimeConfigStore}; @@ -43,6 +44,17 @@ use near_primitives::test_utils::create_user_test_signer; use near_primitives::transaction::SignedTransaction; use near_primitives::views::{FinalExecutionStatus, QueryRequest}; +/// Default and minimal epoch length used in resharding tests. +const DEFAULT_EPOCH_LENGTH: u64 = 6; + +/// Increased epoch length that has to be used in some tests due to the delay caused by catch up. +/// +/// With shorter epoch length, a chunk producer might not finish catch up on time, +/// before it is supposed to accept transactions for the next epoch. +/// That would result in chunk producer rejecting a transaction +/// and later we would hit the `DBNotFoundErr("Transaction ...)` error in tests. +const INCREASED_EPOCH_LENGTH: u64 = 8; + #[derive(derive_builder::Builder)] #[builder(pattern = "owned", build_fn(skip))] #[allow(unused)] @@ -82,7 +94,7 @@ struct TestReshardingParameters { load_mem_tries_for_tracked_shards: bool, /// Custom behavior executed at every iteration of test loop. #[builder(setter(custom))] - loop_actions: Vec, + loop_actions: Vec, // When enabling shard shuffling with a short epoch length, sometimes a node might not finish // catching up by the end of the epoch, and then misses a chunk. This can be fixed by using a longer // epoch length, but it's good to also check what happens with shorter ones. @@ -105,7 +117,7 @@ struct TestReshardingParameters { impl TestReshardingParametersBuilder { fn build(self) -> TestReshardingParameters { - let epoch_length = self.epoch_length.unwrap_or(6); + let epoch_length = self.epoch_length.unwrap_or(DEFAULT_EPOCH_LENGTH); let num_accounts = self.num_accounts.unwrap_or(8); let num_clients = self.num_clients.unwrap_or(7); @@ -193,7 +205,7 @@ impl TestReshardingParametersBuilder { } } - fn add_loop_action(mut self, loop_action: LoopActionFn) -> Self { + fn add_loop_action(mut self, loop_action: LoopAction) -> Self { self.loop_actions.get_or_insert_default().push(loop_action); self } @@ -212,12 +224,12 @@ impl TestReshardingParametersBuilder { // Returns a callable function that, when invoked inside a test loop iteration, can force the creation of a chain fork. #[cfg(feature = "test_features")] -fn fork_before_resharding_block(double_signing: bool) -> LoopActionFn { +fn fork_before_resharding_block(double_signing: bool) -> LoopAction { use crate::test_loop::utils::retrieve_client_actor; use near_client::client_actor::AdvProduceBlockHeightSelection; - let done = Cell::new(false); - Box::new( + let (done, succeeded) = LoopAction::shared_success_flag(); + let action_fn = Box::new( move |node_datas: &[TestData], test_loop_data: &mut TestLoopData, client_account_id: AccountId| { @@ -248,17 +260,19 @@ fn fork_before_resharding_block(double_signing: bool) -> LoopActionFn { done.set(true); } }, - ) + ); + LoopAction::new(action_fn, succeeded) } -fn execute_money_transfers(account_ids: Vec) -> LoopActionFn { +fn execute_money_transfers(account_ids: Vec) -> LoopAction { const NUM_TRANSFERS_PER_BLOCK: usize = 20; let latest_height = Cell::new(0); let seed = rand::thread_rng().gen::(); println!("Random seed: {}", seed); - Box::new( + let (ran_transfers, succeeded) = LoopAction::shared_success_flag(); + let action_fn = Box::new( move |node_datas: &[TestData], test_loop_data: &mut TestLoopData, client_account_id: AccountId| { @@ -301,8 +315,10 @@ fn execute_money_transfers(account_ids: Vec) -> LoopActionFn { ); submit_tx(&node_datas, &client_account_id, tx); } + ran_transfers.set(true); }, - ) + ); + LoopAction::new(action_fn, succeeded) } /// Returns a loop action that invokes a costly method from a contract @@ -315,17 +331,20 @@ fn call_burn_gas_contract( signer_ids: Vec, receiver_ids: Vec, gas_burnt_per_call: Gas, -) -> LoopActionFn { - // Must be less than epoch length, otherwise transactions won't be checked. - const TX_CHECK_BLOCKS_AFTER_RESHARDING: u64 = 5; + epoch_length: u64, +) -> LoopAction { const CALLS_PER_BLOCK_HEIGHT: usize = 5; + // Set to a value large enough, so that transactions from the past epoch are settled. + // Must be less than epoch length, otherwise won't be triggered before the test is finished. + let tx_check_blocks_after_resharding = epoch_length - 2; let resharding_height = Cell::new(None); let nonce = Cell::new(102); let txs = Cell::new(vec![]); let latest_height = Cell::new(0); + let (checked_transactions, succeeded) = LoopAction::shared_success_flag(); - Box::new( + let action_fn = Box::new( move |node_datas: &[TestData], test_loop_data: &mut TestLoopData, client_account_id: AccountId| { @@ -341,7 +360,7 @@ fn call_burn_gas_contract( // After resharding: wait some blocks and check that all txs have been executed correctly. if let Some(height) = resharding_height.get() { - if tip.height > height + TX_CHECK_BLOCKS_AFTER_RESHARDING { + if tip.height > height + tx_check_blocks_after_resharding { for (tx, tx_height) in txs.take() { let tx_outcome = client_actor.client.chain.get_partial_transaction_result(&tx); @@ -350,6 +369,7 @@ fn call_burn_gas_contract( tracing::debug!(target: "test", ?tx_height, ?tx, ?status, "transaction status"); assert_matches!(status, FinalExecutionStatus::SuccessValue(_)); } + checked_transactions.set(true); } } else { if next_block_has_new_shard_layout(client_actor.client.epoch_manager.as_ref(), &tip) @@ -395,7 +415,8 @@ fn call_burn_gas_contract( } } }, - ) + ); + LoopAction::new(action_fn, succeeded) } /// Sends a promise-yield transaction before resharding. Then, if `call_resume` is `true` also sends @@ -408,15 +429,16 @@ fn call_promise_yield( call_resume: bool, signer_ids: Vec, receiver_ids: Vec, -) -> LoopActionFn { +) -> LoopAction { let resharding_height: Cell> = Cell::new(None); let txs = Cell::new(vec![]); let latest_height = Cell::new(0); let promise_txs_sent = Cell::new(false); let nonce = Cell::new(102); let yield_payload = vec![]; + let (checked_transactions, succeeded) = LoopAction::shared_success_flag(); - Box::new( + let action_fn = Box::new( move |node_datas: &[TestData], test_loop_data: &mut TestLoopData, client_account_id: AccountId| { @@ -476,6 +498,7 @@ fn call_promise_yield( tracing::debug!(target: "test", ?tx_height, ?tx, ?status, "transaction status"); assert_matches!(status, FinalExecutionStatus::SuccessValue(_)); } + checked_transactions.set(true); } (Some(_resharding), _latest) => {} // Resharding didn't happen in the past. @@ -523,7 +546,8 @@ fn call_promise_yield( } } }, - ) + ); + LoopAction::new(action_fn, succeeded) } fn get_base_shard_layout(version: u64) -> ShardLayout { @@ -738,7 +762,7 @@ fn test_resharding_v3_base(params: TestReshardingParameters) { params .loop_actions .iter() - .for_each(|action| action(&env.datas, test_loop_data, client_account_id.clone())); + .for_each(|action| action.call(&env.datas, test_loop_data, client_account_id.clone())); let clients = client_handles.iter().map(|handle| &test_loop_data.get(handle).client).collect_vec(); @@ -774,6 +798,9 @@ fn test_resharding_v3_base(params: TestReshardingParameters) { println!("State after resharding:"); print_and_assert_shard_accounts(&clients, &tip); check_state_shard_uid_mapping_after_resharding(&client, parent_shard_uid); + for loop_action in ¶ms.loop_actions { + assert_matches!(loop_action.get_status(), LoopActionStatus::Succeeded); + } return true; }; @@ -907,7 +934,7 @@ fn test_resharding_v3_shard_shuffling_intense() { let chunk_ranges_to_drop = HashMap::from([(0, -1..2), (1, -3..0), (2, -3..3), (3, 0..1)]); let params = TestReshardingParametersBuilder::default() .num_accounts(8) - .epoch_length(8) + .epoch_length(INCREASED_EPOCH_LENGTH) .shuffle_shard_assignment_for_chunk_producers(true) .chunk_ranges_to_drop(chunk_ranges_to_drop) .add_loop_action(execute_money_transfers( @@ -927,6 +954,7 @@ fn test_resharding_v3_delayed_receipts_left_child() { vec![account.clone()], vec![account.clone()], 275 * TGAS, + DEFAULT_EPOCH_LENGTH, )) .add_loop_action(check_receipts_presence_at_resharding_block( vec![account], @@ -947,14 +975,14 @@ fn test_resharding_v3_delayed_receipts_right_child() { vec![account.clone()], vec![account.clone()], 275 * TGAS, + INCREASED_EPOCH_LENGTH, )) .add_loop_action(check_receipts_presence_at_resharding_block( vec![account], ReceiptKind::Delayed, )) .allow_negative_refcount(true) - // TODO(resharding): test should work without changes to track_all_shards - .track_all_shards(true) + .epoch_length(INCREASED_EPOCH_LENGTH) .build(); test_resharding_v3_base(params); } @@ -972,6 +1000,7 @@ fn test_resharding_v3_split_parent_buffered_receipts_base(base_shard_layout_vers vec![account_in_left_child.clone(), account_in_right_child], vec![receiver_account], 10 * TGAS, + INCREASED_EPOCH_LENGTH, )) .add_loop_action(check_receipts_presence_at_resharding_block( vec![account_in_parent], @@ -981,8 +1010,7 @@ fn test_resharding_v3_split_parent_buffered_receipts_base(base_shard_layout_vers vec![account_in_left_child], ReceiptKind::Buffered, )) - // TODO(resharding): test should work without changes to track_all_shards - .track_all_shards(true) + .epoch_length(INCREASED_EPOCH_LENGTH) .build(); test_resharding_v3_base(params); } @@ -1015,6 +1043,7 @@ fn test_resharding_v3_buffered_receipts_towards_splitted_shard_base( vec![account_in_stable_shard.clone()], vec![account_in_left_child, account_in_right_child], 10 * TGAS, + DEFAULT_EPOCH_LENGTH, )) .add_loop_action(check_receipts_presence_at_resharding_block( vec![account_in_stable_shard.clone()], @@ -1052,6 +1081,7 @@ fn test_resharding_v3_outgoing_receipts_towards_splitted_shard() { vec![account_1_in_stable_shard, account_2_in_stable_shard], vec![receiver_account], 5 * TGAS, + DEFAULT_EPOCH_LENGTH, )) .build(); test_resharding_v3_base(params); @@ -1069,9 +1099,9 @@ fn test_resharding_v3_outgoing_receipts_from_splitted_shard() { vec![account_in_left_child, account_in_right_child], vec![receiver_account], 5 * TGAS, + INCREASED_EPOCH_LENGTH, )) - // TODO(resharding): test should work without changes to track_all_shards - .track_all_shards(true) + .epoch_length(INCREASED_EPOCH_LENGTH) .build(); test_resharding_v3_base(params); } @@ -1108,7 +1138,7 @@ fn test_resharding_v3_slower_post_processing_tasks() { test_resharding_v3_base( TestReshardingParametersBuilder::default() .delay_flat_state_resharding(2) - .epoch_length(13) + .epoch_length(INCREASED_EPOCH_LENGTH) .build(), ); } @@ -1119,7 +1149,7 @@ fn test_resharding_v3_shard_shuffling_slower_post_processing_tasks() { let params = TestReshardingParametersBuilder::default() .shuffle_shard_assignment_for_chunk_producers(true) .delay_flat_state_resharding(2) - .epoch_length(13) + .epoch_length(INCREASED_EPOCH_LENGTH) .build(); test_resharding_v3_base(params); } diff --git a/integration-tests/src/test_loop/utils/loop_action.rs b/integration-tests/src/test_loop/utils/loop_action.rs new file mode 100644 index 00000000000..cd28493a07a --- /dev/null +++ b/integration-tests/src/test_loop/utils/loop_action.rs @@ -0,0 +1,72 @@ +use std::cell::Cell; +use std::rc::Rc; + +use near_async::test_loop::data::TestLoopData; +use near_primitives::types::AccountId; + +use crate::test_loop::env::TestData; + +/// Signature of functions callable from inside the inner loop of a testloop test. +pub(crate) type LoopActionFn = Box; + +/// A wrapper for a callable (action) to be used inside a testloop test. +/// +/// The action has two failure modes: +/// 1. It can fail during an execution of the callable (e.g. assert failure). +/// 2. The `succeeded` has never been set by the action and the testloop test is over. +/// +/// The expectation is that `succeeded` would eventually be set to true by some iteration of `action_fn`. +pub(crate) struct LoopAction { + action_fn: LoopActionFn, + started: Cell, + succeeded: Rc>, +} + +impl LoopAction { + /// Returns a pair of pointers to the same flag, initially set to false. + /// To be used for a succees flag that is shared between `LoopAction` and its `LoopActionFn`. + pub fn shared_success_flag() -> (Rc>, Rc>) { + let flag = Rc::new(Cell::new(false)); + (flag.clone(), flag) + } +} + +/// Current status for a `LoopAction`. +#[derive(Debug)] +pub enum LoopActionStatus { + /// The action has never been called. + NotStarted, + /// The action has been called, but it has not set the `succeeded` flag yet. + Started, + /// The `succeeded` flag has been set. + Succeeded, +} + +impl LoopAction { + /// The `succeeded` flag should be shared with `action_fn` that will update the flag at some point. + pub fn new(action_fn: LoopActionFn, succeeded: Rc>) -> LoopAction { + LoopAction { action_fn, started: Cell::new(false), succeeded } + } + + /// Call the action callable with provided arguments. + pub fn call( + &self, + test_data: &[TestData], + test_loop_data: &mut TestLoopData, + account_id: AccountId, + ) { + self.started.set(true); + (self.action_fn)(test_data, test_loop_data, account_id) + } + + /// Return the current status of the loop action. + pub fn get_status(&self) -> LoopActionStatus { + if self.succeeded.get() { + return LoopActionStatus::Succeeded; + } + if self.started.get() { + return LoopActionStatus::Started; + } + LoopActionStatus::NotStarted + } +} diff --git a/integration-tests/src/test_loop/utils/mod.rs b/integration-tests/src/test_loop/utils/mod.rs index d71235a051a..792899d6a45 100644 --- a/integration-tests/src/test_loop/utils/mod.rs +++ b/integration-tests/src/test_loop/utils/mod.rs @@ -4,6 +4,7 @@ use near_client::client_actor::ClientActorInner; use near_primitives::types::AccountId; pub(crate) mod contract_distribution; +pub(crate) mod loop_action; pub(crate) mod network; pub(crate) mod receipts; pub(crate) mod setups; @@ -22,9 +23,6 @@ pub(crate) fn get_head_height(env: &mut TestLoopEnv) -> u64 { client.chain.head().unwrap().height } -/// Signature of functions callable from inside the inner loop of a test loop test. -pub(crate) type LoopActionFn = Box; - /// Returns the test data of for the node with the given account id. pub(crate) fn get_node_data<'a>( node_datas: &'a [TestData], diff --git a/integration-tests/src/test_loop/utils/receipts.rs b/integration-tests/src/test_loop/utils/receipts.rs index fd844eb53db..c364e944cc2 100644 --- a/integration-tests/src/test_loop/utils/receipts.rs +++ b/integration-tests/src/test_loop/utils/receipts.rs @@ -1,5 +1,6 @@ +use super::loop_action::LoopAction; +use super::retrieve_client_actor; use super::sharding::{next_block_has_new_shard_layout, this_block_has_new_shard_layout}; -use super::{retrieve_client_actor, LoopActionFn}; use crate::test_loop::env::TestData; use crate::test_loop::utils::sharding::get_memtrie_for_shard; use near_async::test_loop::data::TestLoopData; @@ -26,8 +27,9 @@ pub enum ReceiptKind { pub fn check_receipts_presence_at_resharding_block( accounts: Vec, kind: ReceiptKind, -) -> LoopActionFn { - Box::new( +) -> LoopAction { + let (checked_receipts, succeeded) = LoopAction::shared_success_flag(); + let action_fn = Box::new( move |node_datas: &[TestData], test_loop_data: &mut TestLoopData, client_account_id: AccountId| { @@ -42,8 +44,10 @@ pub fn check_receipts_presence_at_resharding_block( accounts.iter().for_each(|account| { check_receipts_at_block(client_actor, &account, &kind, tip.clone()) }); + checked_receipts.set(true); }, - ) + ); + LoopAction::new(action_fn, succeeded) } /// Checks that the shards containing `accounts` have a non empty set of receipts @@ -51,8 +55,9 @@ pub fn check_receipts_presence_at_resharding_block( pub fn check_receipts_presence_after_resharding_block( accounts: Vec, kind: ReceiptKind, -) -> LoopActionFn { - Box::new( +) -> LoopAction { + let (checked_receipts, succeeded) = LoopAction::shared_success_flag(); + let action_fn = Box::new( move |node_datas: &[TestData], test_loop_data: &mut TestLoopData, client_account_id: AccountId| { @@ -67,8 +72,10 @@ pub fn check_receipts_presence_after_resharding_block( accounts.iter().for_each(|account| { check_receipts_at_block(client_actor, &account, &kind, tip.clone()) }); + checked_receipts.set(true); }, - ) + ); + LoopAction::new(action_fn, succeeded) } /// Asserts the presence of any receipt of type `kind` at the provided chain `tip`. From 69ae7228d7401a23d4df4c24ca609a76cce71583 Mon Sep 17 00:00:00 2001 From: Marco Date: Thu, 2 Jan 2025 15:51:13 +0200 Subject: [PATCH 12/15] Fix typographical errors (#12676) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### **Description** This pull request addresses several typographical errors across multiple files in the repository. Specifically, it corrects the usage of indefinite articles ("a" vs. "an") and other minor grammatical issues for improved readability and accuracy. Below are the key changes: 1. **File:** `chain/chain/src/orphan.rs` - Fixed: "a orphan block" → "an orphan block" 2. **File:** `core/async/README.md` - Fixed: "a event-loop-based" → "an event-loop-based" 3. **File:** `core/primitives/src/stateless_validation/stored_chunk_state_transition_data.rs` - Fixed: "a implicit missing chunk" → "an implicit missing chunk" 4. **File:** `runtime/near-vm-runner/src/instrument/stack_height/thunk.rs` - Fixed: "a index of a thunk" → "an index of a thunk" 5. **File:** `runtime/near-vm/test-api/src/sys/externals/global.rs` - Fixed: "a immutable global" → "an immutable global" 6. **File:** `runtime/runtime-params-estimator/src/trie.rs` - Fixed: "a implementation" → "an implementation" 7. **File:** `tools/congestion-model/src/main.rs` - Fixed: "an half hour ago" → "a half hour ago" --- chain/chain/src/orphan.rs | 2 +- core/async/README.md | 4 ++-- .../stored_chunk_state_transition_data.rs | 2 +- runtime/near-vm-runner/src/instrument/stack_height/thunk.rs | 2 +- runtime/near-vm/test-api/src/sys/externals/global.rs | 2 +- runtime/runtime-params-estimator/src/trie.rs | 2 +- tools/congestion-model/src/main.rs | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/chain/chain/src/orphan.rs b/chain/chain/src/orphan.rs index dec76709dd8..5f56ec7d349 100644 --- a/chain/chain/src/orphan.rs +++ b/chain/chain/src/orphan.rs @@ -66,7 +66,7 @@ impl Orphan { /// 2) size of the pool exceeds MAX_ORPHAN_SIZE and the orphan was added a long time ago /// or the height is high pub struct OrphanBlockPool { - /// A map from block hash to a orphan block + /// A map from block hash to an orphan block orphans: HashMap, /// A set that contains all orphans for which we have requested missing chunks for them /// An orphan can be added to this set when it was first added to the pool, or later diff --git a/core/async/README.md b/core/async/README.md index 7efa502a5fb..fc5a0776808 100644 --- a/core/async/README.md +++ b/core/async/README.md @@ -4,7 +4,7 @@ This crate contains helpers related to common asynchronous programming patterns used in nearcore: * messaging: common interfaces for sending messages between components. -* test_loop: a event-loop-based test framework that can test multiple components +* test_loop: an event-loop-based test framework that can test multiple components together in a synchronous way. @@ -52,4 +52,4 @@ In tests, the `TestLoopBuilder` provides the `sender()` function which also implements `CanSend`, see the examples directory under this crate. `AsyncSender` is similar, except that calling `send_async` returns a future -that carries the response to the message. \ No newline at end of file +that carries the response to the message. diff --git a/core/primitives/src/stateless_validation/stored_chunk_state_transition_data.rs b/core/primitives/src/stateless_validation/stored_chunk_state_transition_data.rs index ba087a645db..b35a5d86d67 100644 --- a/core/primitives/src/stateless_validation/stored_chunk_state_transition_data.rs +++ b/core/primitives/src/stateless_validation/stored_chunk_state_transition_data.rs @@ -23,7 +23,7 @@ impl StoredChunkStateTransitionData { #[derive(Debug, BorshSerialize, BorshDeserialize, ProtocolSchema)] pub struct StoredChunkStateTransitionDataV1 { /// The partial state that is needed to apply the state transition, - /// whether it is a new chunk state transition or a implicit missing chunk + /// whether it is a new chunk state transition or an implicit missing chunk /// state transition. pub base_state: PartialState, /// If this is a new chunk state transition, the hash of the receipts that diff --git a/runtime/near-vm-runner/src/instrument/stack_height/thunk.rs b/runtime/near-vm-runner/src/instrument/stack_height/thunk.rs index a0d56c20083..65b06ae67fe 100644 --- a/runtime/near-vm-runner/src/instrument/stack_height/thunk.rs +++ b/runtime/near-vm-runner/src/instrument/stack_height/thunk.rs @@ -105,7 +105,7 @@ pub(crate) fn generate_thunks( // And finally, fixup thunks in export and table sections. - // Fixup original function index to a index of a thunk generated earlier. + // Fixup original function index to an index of a thunk generated earlier. let fixup = |function_idx: &mut u32| { // Check whether this function is in replacement_map, since // we can skip thunk generation (e.g. if stack_cost of function is 0). diff --git a/runtime/near-vm/test-api/src/sys/externals/global.rs b/runtime/near-vm/test-api/src/sys/externals/global.rs index a2ab1b9c4c3..b9fb2dd08af 100644 --- a/runtime/near-vm/test-api/src/sys/externals/global.rs +++ b/runtime/near-vm/test-api/src/sys/externals/global.rs @@ -104,7 +104,7 @@ impl Global { /// /// # Errors /// - /// Trying to mutate a immutable global will raise an error: + /// Trying to mutate an immutable global will raise an error: /// /// ```should_panic /// # use near_vm_test_api::{Global, Store, Value}; diff --git a/runtime/runtime-params-estimator/src/trie.rs b/runtime/runtime-params-estimator/src/trie.rs index 95ea3361415..e2feb98b2a1 100644 --- a/runtime/runtime-params-estimator/src/trie.rs +++ b/runtime/runtime-params-estimator/src/trie.rs @@ -58,7 +58,7 @@ pub(crate) fn write_node( ); let nodes_touched_delta = ext_cost_long_key[&ExtCosts::touching_trie_node] - ext_cost_short_key[&ExtCosts::touching_trie_node]; - // The exact number of touched nodes is a implementation that we don't want + // The exact number of touched nodes is an implementation that we don't want // to test here but it should be close to 2*final_key_len assert!(nodes_touched_delta as usize <= 2 * final_key_len + 10); assert!(nodes_touched_delta as usize >= 2 * final_key_len - 10); diff --git a/tools/congestion-model/src/main.rs b/tools/congestion-model/src/main.rs index e4d6485d36c..1b65e795940 100644 --- a/tools/congestion-model/src/main.rs +++ b/tools/congestion-model/src/main.rs @@ -130,7 +130,7 @@ fn run_model( let mut model = Model::new(strategy, workload); let mut max_queues = ShardQueueLengths::default(); - // Set the start time to an half hour ago to make it visible by default in + // Set the start time to a half hour ago to make it visible by default in // grafana. Each round is 1 virtual second so hald an hour is good for // looking at a maximum of 1800 rounds, beyond that you'll need to customize // the grafana time range. From 23e6b81a5794f878f08a9d9ca206ed759e5e052b Mon Sep 17 00:00:00 2001 From: Akhilesh Singhania Date: Thu, 2 Jan 2025 16:15:58 +0100 Subject: [PATCH 13/15] `generate_state_witness_parts` cannot fail (#12645) Does not return a `Result` from `generate_state_witness_parts()` as it cannot fail and some knock on removal of unnecessary `Result`s. --- .../partial_witness/partial_witness_actor.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs index 34f9d0278f6..191607df2fc 100644 --- a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs +++ b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs @@ -239,12 +239,11 @@ impl PartialWitnessActor { witness_bytes, &chunk_validators, &signer, - )?; + ); if !contract_deploys.is_empty() { self.send_chunk_contract_deploys_parts(key, contract_deploys)?; } - Ok(()) } @@ -256,7 +255,7 @@ impl PartialWitnessActor { witness_bytes: EncodedChunkStateWitness, chunk_validators: &[AccountId], signer: &ValidatorSigner, - ) -> Result, Error> { + ) -> Vec<(AccountId, PartialEncodedStateWitness)> { tracing::debug!( target: "client", chunk_hash=?chunk_header.chunk_hash(), @@ -268,7 +267,7 @@ impl PartialWitnessActor { let encoder = self.witness_encoders.entry(chunk_validators.len()); let (parts, encoded_length) = encoder.encode(&witness_bytes); - Ok(chunk_validators + chunk_validators .iter() .zip_eq(parts) .enumerate() @@ -285,7 +284,7 @@ impl PartialWitnessActor { ); (chunk_validator.clone(), partial_witness) }) - .collect_vec()) + .collect_vec() } fn generate_contract_deploys_parts( @@ -333,7 +332,7 @@ impl PartialWitnessActor { witness_bytes: EncodedChunkStateWitness, chunk_validators: &[AccountId], signer: &ValidatorSigner, - ) -> Result<(), Error> { + ) { // Capture these values first, as the sources are consumed before calling record_witness_sent. let chunk_hash = chunk_header.chunk_hash(); let witness_size_in_bytes = witness_bytes.size_bytes(); @@ -349,7 +348,7 @@ impl PartialWitnessActor { witness_bytes, chunk_validators, signer, - )?; + ); encode_timer.observe_duration(); // Record the witness in order to match the incoming acks for measuring round-trip times. @@ -364,7 +363,6 @@ impl PartialWitnessActor { self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( NetworkRequests::PartialEncodedStateWitness(validator_witness_tuple), )); - Ok(()) } /// Sends the witness part to the chunk validators, except the chunk producer that generated the witness part. From 5749548dfde6d3ac4bceafb1f6769705f28333c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Chuda=C5=9B?= <18039094+staffik@users.noreply.github.com> Date: Thu, 2 Jan 2025 21:56:17 +0100 Subject: [PATCH 14/15] fix(resharding): MissingTrieValue in tests (#12677) Partially applies the patch: https://github.com/near/nearcore/pull/12597 It was failing for delayed receipt tests with `MissingTrieValue(TrieStorage)` inside `check_state_shard_uid_mapping_after_resharding`. The reason is that trie access interface returns `None` for a key with an empty value (which is the case for delayed receipts and negative refcounts). That was not caught up before the patch, because the `check_state_shard_uid_mapping_after_resharding` was called after resharding, but not in the end of the test (after gc kicked in). **Changes** This PR modifies the `check_state_shard_uid_mapping_after_resharding` to not check entries with negative refcount. The test is refactored so that the main loop does not finish an epoch after resharding, but keeps running until gc kicks in. For that, the code responsible for testing temporary account is moved to a loop action, and all loop actions are moved to a separate file for readability. **Note** `test_resharding_v3_yield_resume` is still failing with the original patch: https://github.com/near/nearcore/pull/12597 --- .../src/test_loop/tests/congestion_control.rs | 4 +- .../contract_distribution_cross_shard.rs | 4 +- .../test_loop/tests/create_delete_account.rs | 2 +- .../src/test_loop/tests/max_receipt_size.rs | 10 +- .../src/test_loop/tests/resharding_v3.rs | 557 ++++-------------- integration-tests/src/test_loop/utils/mod.rs | 1 + .../src/test_loop/utils/resharding.rs | 483 +++++++++++++++ .../src/test_loop/utils/transactions.rs | 37 +- .../src/test_loop/utils/trie_sanity.rs | 19 +- 9 files changed, 638 insertions(+), 479 deletions(-) create mode 100644 integration-tests/src/test_loop/utils/resharding.rs diff --git a/integration-tests/src/test_loop/tests/congestion_control.rs b/integration-tests/src/test_loop/tests/congestion_control.rs index 57c11454ab3..1fd3cbbb757 100644 --- a/integration-tests/src/test_loop/tests/congestion_control.rs +++ b/integration-tests/src/test_loop/tests/congestion_control.rs @@ -114,7 +114,7 @@ fn do_deploy_contract( let code = near_test_contracts::rs_contract().to_vec(); let tx = deploy_contract(test_loop, node_datas, rpc_id, contract_id, code, 1); test_loop.run_for(Duration::seconds(5)); - check_txs(&*test_loop, node_datas, rpc_id, &[tx]); + check_txs(&test_loop.data, node_datas, rpc_id, &[tx]); } /// Call the contract from all accounts and wait until the transactions are executed. @@ -144,7 +144,7 @@ fn do_call_contract( txs.push(tx); } test_loop.run_for(Duration::seconds(20)); - check_txs(&*test_loop, node_datas, &rpc_id, &txs); + check_txs(&test_loop.data, node_datas, &rpc_id, &txs); } /// The condition that can be used for the test loop to wait until the chain diff --git a/integration-tests/src/test_loop/tests/contract_distribution_cross_shard.rs b/integration-tests/src/test_loop/tests/contract_distribution_cross_shard.rs index ae37fabd066..af89ff790f8 100644 --- a/integration-tests/src/test_loop/tests/contract_distribution_cross_shard.rs +++ b/integration-tests/src/test_loop/tests/contract_distribution_cross_shard.rs @@ -143,7 +143,7 @@ fn deploy_contracts( contracts.push(contract); } env.test_loop.run_for(Duration::seconds(2)); - check_txs(&env.test_loop, &env.datas, rpc_id, &txs); + check_txs(&env.test_loop.data, &env.datas, rpc_id, &txs); contracts } @@ -175,5 +175,5 @@ fn call_contracts( } } env.test_loop.run_for(Duration::seconds(2)); - check_txs(&env.test_loop, &env.datas, &rpc_id, &txs); + check_txs(&env.test_loop.data, &env.datas, &rpc_id, &txs); } diff --git a/integration-tests/src/test_loop/tests/create_delete_account.rs b/integration-tests/src/test_loop/tests/create_delete_account.rs index 66a87355e82..22681d07ed5 100644 --- a/integration-tests/src/test_loop/tests/create_delete_account.rs +++ b/integration-tests/src/test_loop/tests/create_delete_account.rs @@ -33,7 +33,7 @@ fn do_call_contract(env: &mut TestLoopEnv, rpc_id: &AccountId, contract_id: &Acc nonce, ); env.test_loop.run_for(Duration::seconds(5)); - check_txs(&env.test_loop, &env.datas, rpc_id, &[tx]); + check_txs(&env.test_loop.data, &env.datas, rpc_id, &[tx]); } /// Tracks latest block heights and checks that all chunks are produced. diff --git a/integration-tests/src/test_loop/tests/max_receipt_size.rs b/integration-tests/src/test_loop/tests/max_receipt_size.rs index 9d386a608bd..7d11ae4ceab 100644 --- a/integration-tests/src/test_loop/tests/max_receipt_size.rs +++ b/integration-tests/src/test_loop/tests/max_receipt_size.rs @@ -31,7 +31,7 @@ fn slow_test_max_receipt_size() { &account0, vec![0u8; 2_000_000], account0_signer, - get_shared_block_hash(&env.datas, &env.test_loop), + get_shared_block_hash(&env.datas, &env.test_loop.data), ); let large_tx_exec_res = execute_tx(&mut env.test_loop, &rpc_id, large_tx, &env.datas, Duration::seconds(5)); @@ -43,7 +43,7 @@ fn slow_test_max_receipt_size() { &account0, near_test_contracts::rs_contract().into(), &account0_signer, - get_shared_block_hash(&env.datas, &env.test_loop), + get_shared_block_hash(&env.datas, &env.test_loop.data), ); run_tx(&mut env.test_loop, &rpc_id, deploy_contract_tx, &env.datas, Duration::seconds(5)); @@ -59,7 +59,7 @@ fn slow_test_max_receipt_size() { "generate_large_receipt".into(), r#"{"account_id": "account0", "method_name": "noop", "total_args_size": 3000000}"#.into(), 300 * TGAS, - get_shared_block_hash(&env.datas, &env.test_loop), + get_shared_block_hash(&env.datas, &env.test_loop.data), ); run_tx(&mut env.test_loop, &rpc_id, large_receipt_tx, &env.datas, Duration::seconds(5)); @@ -73,7 +73,7 @@ fn slow_test_max_receipt_size() { "generate_large_receipt".into(), r#"{"account_id": "account0", "method_name": "noop", "total_args_size": 5000000}"#.into(), 300 * TGAS, - get_shared_block_hash(&env.datas, &env.test_loop), + get_shared_block_hash(&env.datas, &env.test_loop.data), ); let too_large_receipt_tx_exec_res = execute_tx( &mut env.test_loop, @@ -115,7 +115,7 @@ fn slow_test_max_receipt_size() { "sum_n".into(), 5_u64.to_le_bytes().to_vec(), 300 * TGAS, - get_shared_block_hash(&env.datas, &env.test_loop), + get_shared_block_hash(&env.datas, &env.test_loop.data), ); let sum_4_res = run_tx(&mut env.test_loop, &rpc_id, sum_4_tx, &env.datas, Duration::seconds(5)); assert_eq!(sum_4_res, 10u64.to_le_bytes().to_vec()); diff --git a/integration-tests/src/test_loop/tests/resharding_v3.rs b/integration-tests/src/test_loop/tests/resharding_v3.rs index 4c0864056f0..f6064417095 100644 --- a/integration-tests/src/test_loop/tests/resharding_v3.rs +++ b/integration-tests/src/test_loop/tests/resharding_v3.rs @@ -1,48 +1,38 @@ +use assert_matches::assert_matches; use itertools::Itertools; use near_async::test_loop::data::TestLoopData; use near_async::time::Duration; use near_chain_configs::test_genesis::{TestGenesisBuilder, ValidatorsSpec}; -use near_chain_configs::DEFAULT_GC_NUM_EPOCHS_TO_KEEP; -use near_client::Query; use near_o11y::testonly::init_test_logger; use near_primitives::epoch_manager::EpochConfigStore; use near_primitives::shard_layout::ShardLayout; -use near_primitives::types::{ - AccountId, BlockHeightDelta, BlockId, BlockReference, Gas, ShardId, ShardIndex, -}; +use near_primitives::types::{AccountId, BlockHeightDelta, ShardId, ShardIndex}; use near_primitives::version::{ProtocolFeature, PROTOCOL_VERSION}; -use rand::seq::SliceRandom; -use rand::Rng; -use rand::SeedableRng; -use rand_chacha::ChaCha20Rng; use std::cell::Cell; use std::collections::{BTreeMap, HashMap}; use std::sync::Arc; use crate::test_loop::builder::TestLoopBuilder; -use crate::test_loop::env::{TestData, TestLoopEnv}; use crate::test_loop::utils::loop_action::{LoopAction, LoopActionStatus}; use crate::test_loop::utils::receipts::{ check_receipts_presence_after_resharding_block, check_receipts_presence_at_resharding_block, ReceiptKind, }; -use crate::test_loop::utils::sharding::{ - next_block_has_new_shard_layout, print_and_assert_shard_accounts, +#[cfg(feature = "test_features")] +use crate::test_loop::utils::resharding::fork_before_resharding_block; +use crate::test_loop::utils::resharding::{ + call_burn_gas_contract, call_promise_yield, execute_money_transfers, + temporary_account_during_resharding, }; +use crate::test_loop::utils::sharding::print_and_assert_shard_accounts; use crate::test_loop::utils::transactions::{ - check_txs, create_account, delete_account, deploy_contract, get_anchor_hash, get_next_nonce, - get_smallest_height_head, store_and_submit_tx, submit_tx, + check_txs, create_account, deploy_contract, get_smallest_height_head, }; use crate::test_loop::utils::trie_sanity::{ check_state_shard_uid_mapping_after_resharding, TrieSanityCheck, }; -use crate::test_loop::utils::{get_node_data, retrieve_client_actor, ONE_NEAR, TGAS}; -use assert_matches::assert_matches; -use near_crypto::Signer; +use crate::test_loop::utils::{ONE_NEAR, TGAS}; use near_parameters::{vm, RuntimeConfig, RuntimeConfigStore}; -use near_primitives::test_utils::create_user_test_signer; -use near_primitives::transaction::SignedTransaction; -use near_primitives::views::{FinalExecutionStatus, QueryRequest}; /// Default and minimal epoch length used in resharding tests. const DEFAULT_EPOCH_LENGTH: u64 = 6; @@ -82,10 +72,13 @@ struct TestReshardingParameters { validators: Vec, #[builder(setter(skip))] rpcs: Vec, + // Index of the client used to serve requests (RPC node if available, otherwise first from `clients`) #[builder(setter(skip))] - rpc_client_index: Option, + client_index: usize, #[builder(setter(skip))] archivals: Vec, + #[builder(setter(skip))] + new_boundary_account: AccountId, initial_balance: u128, epoch_length: BlockHeightDelta, chunk_ranges_to_drop: HashMap>, @@ -113,6 +106,11 @@ struct TestReshardingParameters { // TODO(resharding) Remove this when negative refcounts are properly handled. /// Whether to allow negative refcount being a result of the database update. allow_negative_refcount: bool, + /// If not disabled, use testloop action that will delete an account after resharding + /// and check that the account is accessible through archival node but not through a regular node. + disable_temporary_account_test: bool, + #[builder(setter(skip))] + temporary_account_id: AccountId, } impl TestReshardingParametersBuilder { @@ -159,17 +157,34 @@ impl TestReshardingParametersBuilder { let validators = validators.to_vec(); let (rpcs, tmp) = tmp.split_at(num_rpcs as usize); let rpcs = rpcs.to_vec(); - let rpc_client_index = - rpcs.first().map(|_| num_producers as usize + num_validators as usize); let (archivals, _) = tmp.split_at(num_archivals as usize); let archivals = archivals.to_vec(); + let client_index = + if rpcs.is_empty() { 0 } else { num_producers + num_validators } as usize; + let client_id = clients[client_index].clone(); + println!("Clients setup:"); println!("Producers: {producers:?}"); println!("Validators: {validators:?}"); - println!("Rpcs: {rpcs:?}, first RPC node uses client at index: {rpc_client_index:?}"); + println!("Rpcs: {rpcs:?}, to serve requests we use client: {client_id}"); println!("Archivals: {archivals:?}"); + let new_boundary_account: AccountId = "account6".parse().unwrap(); + let temporary_account_id: AccountId = + format!("{}.{}", new_boundary_account, new_boundary_account).parse().unwrap(); + let mut loop_actions = self.loop_actions.unwrap_or_default(); + let disable_temporary_account_test = self.disable_temporary_account_test.unwrap_or(false); + if !disable_temporary_account_test { + let archival_id = archivals.iter().next().cloned(); + loop_actions.push(temporary_account_during_resharding( + archival_id, + client_id, + new_boundary_account.clone(), + temporary_account_id.clone(), + )); + } + TestReshardingParameters { base_shard_layout_version: self.base_shard_layout_version.unwrap_or(2), num_accounts, @@ -183,8 +198,9 @@ impl TestReshardingParametersBuilder { producers, validators, rpcs, - rpc_client_index, + client_index, archivals, + new_boundary_account, initial_balance: self.initial_balance.unwrap_or(1_000_000 * ONE_NEAR), epoch_length, chunk_ranges_to_drop: self.chunk_ranges_to_drop.unwrap_or_default(), @@ -195,13 +211,15 @@ impl TestReshardingParametersBuilder { load_mem_tries_for_tracked_shards: self .load_mem_tries_for_tracked_shards .unwrap_or(true), - loop_actions: self.loop_actions.unwrap_or_default(), + loop_actions, all_chunks_expected: self.all_chunks_expected.unwrap_or(false), deploy_test_contract: self.deploy_test_contract.unwrap_or_default(), limit_outgoing_gas: self.limit_outgoing_gas.unwrap_or(false), delay_flat_state_resharding: self.delay_flat_state_resharding.unwrap_or(0), short_yield_timeout: self.short_yield_timeout.unwrap_or(false), allow_negative_refcount: self.allow_negative_refcount.unwrap_or(false), + disable_temporary_account_test, + temporary_account_id, } } @@ -222,334 +240,6 @@ impl TestReshardingParametersBuilder { } } -// Returns a callable function that, when invoked inside a test loop iteration, can force the creation of a chain fork. -#[cfg(feature = "test_features")] -fn fork_before_resharding_block(double_signing: bool) -> LoopAction { - use crate::test_loop::utils::retrieve_client_actor; - use near_client::client_actor::AdvProduceBlockHeightSelection; - - let (done, succeeded) = LoopAction::shared_success_flag(); - let action_fn = Box::new( - move |node_datas: &[TestData], - test_loop_data: &mut TestLoopData, - client_account_id: AccountId| { - // It must happen only for the first resharding block encountered. - if done.get() { - return; - } - let client_actor = - retrieve_client_actor(node_datas, test_loop_data, &client_account_id); - let tip = client_actor.client.chain.head().unwrap(); - - // If there's a new shard layout force a chain fork. - if next_block_has_new_shard_layout(client_actor.client.epoch_manager.as_ref(), &tip) { - println!("creating chain fork at height {}", tip.height); - let height_selection = if double_signing { - // In the double signing scenario we want a new block on top of prev block, with consecutive height. - AdvProduceBlockHeightSelection::NextHeightOnSelectedBlock { - base_block_height: tip.height - 1, - } - } else { - // To avoid double signing skip already produced height. - AdvProduceBlockHeightSelection::SelectedHeightOnSelectedBlock { - produced_block_height: tip.height + 1, - base_block_height: tip.height - 1, - } - }; - client_actor.adv_produce_blocks_on(3, true, height_selection); - done.set(true); - } - }, - ); - LoopAction::new(action_fn, succeeded) -} - -fn execute_money_transfers(account_ids: Vec) -> LoopAction { - const NUM_TRANSFERS_PER_BLOCK: usize = 20; - - let latest_height = Cell::new(0); - let seed = rand::thread_rng().gen::(); - println!("Random seed: {}", seed); - - let (ran_transfers, succeeded) = LoopAction::shared_success_flag(); - let action_fn = Box::new( - move |node_datas: &[TestData], - test_loop_data: &mut TestLoopData, - client_account_id: AccountId| { - let client_actor = - retrieve_client_actor(node_datas, test_loop_data, &client_account_id); - let tip = client_actor.client.chain.head().unwrap(); - - // Run this action only once at every block height. - if latest_height.get() == tip.height { - return; - } - latest_height.set(tip.height); - - let mut slice = [0u8; 32]; - slice[0..8].copy_from_slice(&seed.to_le_bytes()); - slice[8..16].copy_from_slice(&tip.height.to_le_bytes()); - let mut rng: ChaCha20Rng = SeedableRng::from_seed(slice); - - for _ in 0..NUM_TRANSFERS_PER_BLOCK { - let sender = account_ids.choose(&mut rng).unwrap().clone(); - let receiver = account_ids.choose(&mut rng).unwrap().clone(); - - let clients = node_datas - .iter() - .map(|test_data| { - &test_loop_data.get(&test_data.client_sender.actor_handle()).client - }) - .collect_vec(); - - let anchor_hash = get_anchor_hash(&clients); - let nonce = get_next_nonce(&test_loop_data, &node_datas, &sender); - let amount = ONE_NEAR * rng.gen_range(1..=10); - let tx = SignedTransaction::send_money( - nonce, - sender.clone(), - receiver.clone(), - &create_user_test_signer(&sender).into(), - amount, - anchor_hash, - ); - submit_tx(&node_datas, &client_account_id, tx); - } - ran_transfers.set(true); - }, - ); - LoopAction::new(action_fn, succeeded) -} - -/// Returns a loop action that invokes a costly method from a contract -/// `CALLS_PER_BLOCK_HEIGHT` times per block height. -/// -/// The account invoking the contract is taken in sequential order from `signed_ids`. -/// -/// The account receiving the contract call is taken in sequential order from `receiver_ids`. -fn call_burn_gas_contract( - signer_ids: Vec, - receiver_ids: Vec, - gas_burnt_per_call: Gas, - epoch_length: u64, -) -> LoopAction { - const CALLS_PER_BLOCK_HEIGHT: usize = 5; - // Set to a value large enough, so that transactions from the past epoch are settled. - // Must be less than epoch length, otherwise won't be triggered before the test is finished. - let tx_check_blocks_after_resharding = epoch_length - 2; - - let resharding_height = Cell::new(None); - let nonce = Cell::new(102); - let txs = Cell::new(vec![]); - let latest_height = Cell::new(0); - let (checked_transactions, succeeded) = LoopAction::shared_success_flag(); - - let action_fn = Box::new( - move |node_datas: &[TestData], - test_loop_data: &mut TestLoopData, - client_account_id: AccountId| { - let client_actor = - retrieve_client_actor(node_datas, test_loop_data, &client_account_id); - let tip = client_actor.client.chain.head().unwrap(); - - // Run this action only once at every block height. - if latest_height.get() == tip.height { - return; - } - latest_height.set(tip.height); - - // After resharding: wait some blocks and check that all txs have been executed correctly. - if let Some(height) = resharding_height.get() { - if tip.height > height + tx_check_blocks_after_resharding { - for (tx, tx_height) in txs.take() { - let tx_outcome = - client_actor.client.chain.get_partial_transaction_result(&tx); - let status = tx_outcome.as_ref().map(|o| o.status.clone()); - let status = status.unwrap(); - tracing::debug!(target: "test", ?tx_height, ?tx, ?status, "transaction status"); - assert_matches!(status, FinalExecutionStatus::SuccessValue(_)); - } - checked_transactions.set(true); - } - } else { - if next_block_has_new_shard_layout(client_actor.client.epoch_manager.as_ref(), &tip) - { - tracing::debug!(target: "test", height=tip.height, "resharding height set"); - resharding_height.set(Some(tip.height)); - } - } - // Before resharding and one block after: call the test contract a few times per block. - // The objective is to pile up receipts (e.g. delayed). - if tip.height <= resharding_height.get().unwrap_or(1000) + 1 { - for i in 0..CALLS_PER_BLOCK_HEIGHT { - // Note that if the number of signers and receivers is the - // same then the traffic will always flow the same way. It - // would be nice to randomize it a bit. - let signer_id = &signer_ids[i % signer_ids.len()]; - let receiver_id = &receiver_ids[i % receiver_ids.len()]; - let signer: Signer = create_user_test_signer(signer_id).into(); - nonce.set(nonce.get() + 1); - let method_name = "burn_gas_raw".to_owned(); - let burn_gas: u64 = gas_burnt_per_call; - let args = burn_gas.to_le_bytes().to_vec(); - let tx = SignedTransaction::call( - nonce.get(), - signer_id.clone(), - receiver_id.clone(), - &signer, - 1, - method_name, - args, - gas_burnt_per_call + 10 * TGAS, - tip.last_block_hash, - ); - store_and_submit_tx( - &node_datas, - &client_account_id, - &txs, - &signer_id, - &receiver_id, - tip.height, - tx, - ); - } - } - }, - ); - LoopAction::new(action_fn, succeeded) -} - -/// Sends a promise-yield transaction before resharding. Then, if `call_resume` is `true` also sends -/// a yield-resume transaction after resharding, otherwise it lets the promise-yield go into timeout. -/// -/// Each `signer_id` sends transaction to the corresponding `receiver_id`. -/// -/// A few blocks after resharding all transactions outcomes are checked for successful execution. -fn call_promise_yield( - call_resume: bool, - signer_ids: Vec, - receiver_ids: Vec, -) -> LoopAction { - let resharding_height: Cell> = Cell::new(None); - let txs = Cell::new(vec![]); - let latest_height = Cell::new(0); - let promise_txs_sent = Cell::new(false); - let nonce = Cell::new(102); - let yield_payload = vec![]; - let (checked_transactions, succeeded) = LoopAction::shared_success_flag(); - - let action_fn = Box::new( - move |node_datas: &[TestData], - test_loop_data: &mut TestLoopData, - client_account_id: AccountId| { - let client_actor = - retrieve_client_actor(node_datas, test_loop_data, &client_account_id); - let tip = client_actor.client.chain.head().unwrap(); - - // Run this action only once at every block height. - if latest_height.get() == tip.height { - return; - } - latest_height.set(tip.height); - - // The operation to be done depends on the current block height in relation to the - // resharding height. - match (resharding_height.get(), latest_height.get()) { - // Resharding happened in the previous block. - // Maybe send the resume transaction. - (Some(resharding), latest) if latest == resharding + 1 && call_resume => { - for (signer_id, receiver_id) in - signer_ids.clone().into_iter().zip(receiver_ids.clone().into_iter()) - { - let signer: Signer = create_user_test_signer(&signer_id).into(); - nonce.set(nonce.get() + 1); - let tx = SignedTransaction::call( - nonce.get(), - signer_id.clone(), - receiver_id.clone(), - &signer, - 1, - "call_yield_resume_read_data_id_from_storage".to_string(), - yield_payload.clone(), - 300 * TGAS, - tip.last_block_hash, - ); - store_and_submit_tx( - &node_datas, - &client_account_id, - &txs, - &signer_id, - &receiver_id, - tip.height, - tx, - ); - } - } - // Resharding happened a few blocks in the past. - // Check transactions' outcomes. - (Some(resharding), latest) if latest == resharding + 4 => { - let txs = txs.take(); - assert_ne!(txs.len(), 0); - for (tx, tx_height) in txs { - let tx_outcome = - client_actor.client.chain.get_partial_transaction_result(&tx); - let status = tx_outcome.as_ref().map(|o| o.status.clone()); - let status = status.unwrap(); - tracing::debug!(target: "test", ?tx_height, ?tx, ?status, "transaction status"); - assert_matches!(status, FinalExecutionStatus::SuccessValue(_)); - } - checked_transactions.set(true); - } - (Some(_resharding), _latest) => {} - // Resharding didn't happen in the past. - (None, _) => { - // Check if resharding will happen in this block. - if next_block_has_new_shard_layout( - client_actor.client.epoch_manager.as_ref(), - &tip, - ) { - tracing::debug!(target: "test", height=tip.height, "resharding height set"); - resharding_height.set(Some(tip.height)); - return; - } - // Before resharding, send a set of promise transactions, just once. - if promise_txs_sent.get() { - return; - } - for (signer_id, receiver_id) in - signer_ids.clone().into_iter().zip(receiver_ids.clone().into_iter()) - { - let signer: Signer = create_user_test_signer(&signer_id).into(); - nonce.set(nonce.get() + 1); - let tx = SignedTransaction::call( - nonce.get(), - signer_id.clone(), - receiver_id.clone(), - &signer, - 0, - "call_yield_create_return_promise".to_string(), - yield_payload.clone(), - 300 * TGAS, - tip.last_block_hash, - ); - store_and_submit_tx( - &node_datas, - &client_account_id, - &txs, - &signer_id, - &receiver_id, - tip.height, - tx, - ); - } - promise_txs_sent.set(true); - } - } - }, - ); - LoopAction::new(action_fn, succeeded) -} - fn get_base_shard_layout(version: u64) -> ShardLayout { let boundary_accounts = vec!["account1".parse().unwrap(), "account3".parse().unwrap()]; match version { @@ -568,40 +258,6 @@ fn get_base_shard_layout(version: u64) -> ShardLayout { } } -// After resharding and gc-period, assert the deleted `account_id` -// is still accessible through archival node view client (if available), -// and it is not accessible through a regular, RPC node. -fn check_deleted_account_availability( - env: &mut TestLoopEnv, - archival_id: &Option<&AccountId>, - rpc_id: &AccountId, - account_id: AccountId, - height: u64, -) { - let rpc_node_data = get_node_data(&env.datas, &rpc_id); - let rpc_view_client_handle = rpc_node_data.view_client_sender.actor_handle(); - - let block_reference = BlockReference::BlockId(BlockId::Height(height)); - let request = QueryRequest::ViewAccount { account_id }; - let msg = Query::new(block_reference, request); - - let rpc_node_result = { - let view_client = env.test_loop.data.get_mut(&rpc_view_client_handle); - near_async::messaging::Handler::handle(view_client, msg.clone()) - }; - assert!(!rpc_node_result.is_ok()); - - if let Some(archival_id) = archival_id { - let archival_node_data = get_node_data(&env.datas, &archival_id); - let archival_view_client_handle = archival_node_data.view_client_sender.actor_handle(); - let archival_node_result = { - let view_client = env.test_loop.data.get_mut(&archival_view_client_handle); - near_async::messaging::Handler::handle(view_client, msg) - }; - assert!(archival_node_result.is_ok()); - } -} - /// Base setup to check sanity of Resharding V3. fn test_resharding_v3_base(params: TestReshardingParameters) { if !ProtocolFeature::SimpleNightshadeV4.enabled(PROTOCOL_VERSION) { @@ -637,7 +293,7 @@ fn test_resharding_v3_base(params: TestReshardingParameters) { let base_shard_layout = get_base_shard_layout(params.base_shard_layout_version); base_epoch_config.shard_layout = base_shard_layout.clone(); - let new_boundary_account = "account6".parse().unwrap(); + let new_boundary_account = params.new_boundary_account; let parent_shard_uid = base_shard_layout.account_id_to_shard_uid(&new_boundary_account); let mut epoch_config = base_epoch_config.clone(); epoch_config.shard_layout = @@ -687,10 +343,8 @@ fn test_resharding_v3_base(params: TestReshardingParameters) { builder = builder.runtime_config_store(runtime_config_store); } - let archival_id = params.archivals.iter().next(); - // Try to use an RPC client, if available. Otherwise fallback to the client with the lowest index. - let client_index = params.rpc_client_index.unwrap_or(0); - let client_account_id = params.rpcs.get(0).unwrap_or_else(|| ¶ms.clients[0]).clone(); + let client_index = params.client_index; + let client_account_id = params.clients[client_index].clone(); let mut env = builder .genesis(genesis) @@ -716,27 +370,20 @@ fn test_resharding_v3_base(params: TestReshardingParameters) { ); test_setup_transactions.push(deploy_contract_tx); } - - // Create an account that is: - // 1) Subaccount of a future resharding boundary account. - // 2) Temporary, because we will remove it after resharding. - // The goal is to test removing some state and see if it is kept on archival node. - // The secondary goal is to catch potential bugs due to the above two conditions making it a special case. - let temporary_account = - format!("{}.{}", new_boundary_account, new_boundary_account).parse().unwrap(); - let create_account_tx = create_account( - &mut env, - &client_account_id, - &new_boundary_account, - &temporary_account, - 10 * ONE_NEAR, - 2, - ); - test_setup_transactions.push(create_account_tx); - + if !params.disable_temporary_account_test { + let create_account_tx = create_account( + &mut env, + &client_account_id, + &new_boundary_account, + ¶ms.temporary_account_id, + 10 * ONE_NEAR, + 2, + ); + test_setup_transactions.push(create_account_tx); + } // Wait for the test setup transactions to settle and ensure they all succeeded. env.test_loop.run_for(Duration::seconds(2)); - check_txs(&env.test_loop, &env.datas, &client_account_id, &test_setup_transactions); + check_txs(&env.test_loop.data, &env.datas, &client_account_id, &test_setup_transactions); let client_handles = env.datas.iter().map(|data| data.client_sender.actor_handle()).collect_vec(); @@ -756,8 +403,13 @@ fn test_resharding_v3_base(params: TestReshardingParameters) { client_handles.iter().map(|handle| &env.test_loop.data.get(handle).client).collect_vec(); let mut trie_sanity_check = TrieSanityCheck::new(&clients, params.load_mem_tries_for_tracked_shards); + let gc_num_epochs_to_keep = clients[client_index].config.gc.gc_num_epochs_to_keep; - let latest_block_height = std::cell::Cell::new(0u64); + let latest_block_height = Cell::new(0u64); + // Height of a block after resharding. + let new_layout_block_height = Cell::new(None); + // Height of an epoch after resharding. + let new_layout_epoch_height = Cell::new(None); let success_condition = |test_loop_data: &mut TestLoopData| -> bool { params .loop_actions @@ -766,38 +418,58 @@ fn test_resharding_v3_base(params: TestReshardingParameters) { let clients = client_handles.iter().map(|handle| &test_loop_data.get(handle).client).collect_vec(); - let client = clients[client_index]; + // Skip if we already checked the latest height let tip = get_smallest_height_head(&clients); + if latest_block_height.get() == tip.height { + return false; + } + if latest_block_height.get() == 0 { + println!("State before resharding:"); + print_and_assert_shard_accounts(&clients, &tip); + } + latest_block_height.set(tip.height); // Check that all chunks are included. + let client = clients[client_index]; let block_header = client.chain.get_block_header(&tip.last_block_hash).unwrap(); - if latest_block_height.get() < tip.height { - if latest_block_height.get() == 0 { - println!("State before resharding:"); - print_and_assert_shard_accounts(&clients, &tip); - } - trie_sanity_check.assert_state_sanity(&clients, expected_num_shards); - latest_block_height.set(tip.height); - if params.all_chunks_expected && params.chunk_ranges_to_drop.is_empty() { - assert!(block_header.chunk_mask().iter().all(|chunk_bit| *chunk_bit)); - } + if params.all_chunks_expected && params.chunk_ranges_to_drop.is_empty() { + assert!(block_header.chunk_mask().iter().all(|chunk_bit| *chunk_bit)); } - // Return true if we passed an epoch with increased number of shards. + trie_sanity_check.assert_state_sanity(&clients, expected_num_shards); + let epoch_height = client.epoch_manager.get_epoch_height_from_prev_block(&tip.prev_block_hash).unwrap(); - assert!(epoch_height < 6); - let prev_epoch_id = - client.epoch_manager.get_prev_epoch_id_from_prev_block(&tip.prev_block_hash).unwrap(); - let epoch_config = client.epoch_manager.get_epoch_config(&prev_epoch_id).unwrap(); - if epoch_config.shard_layout.num_shards() != expected_num_shards { - return false; + + // Return false if we have not yet passed an epoch with increased number of shards. + if new_layout_epoch_height.get().is_none() { + assert!(epoch_height < 6); + let prev_epoch_id = client + .epoch_manager + .get_prev_epoch_id_from_prev_block(&tip.prev_block_hash) + .unwrap(); + let epoch_config = client.epoch_manager.get_epoch_config(&prev_epoch_id).unwrap(); + if epoch_config.shard_layout.num_shards() != expected_num_shards { + return false; + } + // Just passed an epoch with increased number of shards. + new_layout_block_height.set(Some(latest_block_height.get())); + new_layout_epoch_height.set(Some(epoch_height)); + println!("State after resharding:"); + print_and_assert_shard_accounts(&clients, &tip); } - println!("State after resharding:"); - print_and_assert_shard_accounts(&clients, &tip); - check_state_shard_uid_mapping_after_resharding(&client, parent_shard_uid); + check_state_shard_uid_mapping_after_resharding( + &client, + parent_shard_uid, + params.allow_negative_refcount, + ); + + // Return false if garbage collection window has not passed yet since resharding. + if epoch_height <= new_layout_epoch_height.get().unwrap() + gc_num_epochs_to_keep { + return false; + } for loop_action in ¶ms.loop_actions { assert_matches!(loop_action.get_status(), LoopActionStatus::Succeeded); } @@ -811,21 +483,6 @@ fn test_resharding_v3_base(params: TestReshardingParameters) { ); let client = &env.test_loop.data.get(&client_handles[client_index]).client; trie_sanity_check.check_epochs(client); - let height_after_resharding = latest_block_height.get(); - - // Delete `temporary_account`. - delete_account(&mut env, &client_account_id, &temporary_account, &client_account_id); - // Wait for garbage collection to kick in. - env.test_loop - .run_for(Duration::seconds((DEFAULT_GC_NUM_EPOCHS_TO_KEEP * params.epoch_length) as i64)); - // Check that the deleted account is still accessible at archival node, but not at a regular node. - check_deleted_account_availability( - &mut env, - &archival_id, - &client_account_id, - temporary_account, - height_after_resharding, - ); env.shutdown_and_drain_remaining_events(Duration::seconds(20)); } @@ -851,6 +508,7 @@ fn test_resharding_v3_drop_chunks_before() { test_resharding_v3_base( TestReshardingParametersBuilder::default() .chunk_ranges_to_drop(chunk_ranges_to_drop) + .epoch_length(INCREASED_EPOCH_LENGTH) .build(), ); } @@ -871,6 +529,7 @@ fn test_resharding_v3_drop_chunks_before_and_after() { test_resharding_v3_base( TestReshardingParametersBuilder::default() .chunk_ranges_to_drop(chunk_ranges_to_drop) + .epoch_length(INCREASED_EPOCH_LENGTH) .build(), ); } diff --git a/integration-tests/src/test_loop/utils/mod.rs b/integration-tests/src/test_loop/utils/mod.rs index 792899d6a45..fcacfb0e16b 100644 --- a/integration-tests/src/test_loop/utils/mod.rs +++ b/integration-tests/src/test_loop/utils/mod.rs @@ -7,6 +7,7 @@ pub(crate) mod contract_distribution; pub(crate) mod loop_action; pub(crate) mod network; pub(crate) mod receipts; +pub(crate) mod resharding; pub(crate) mod setups; pub(crate) mod sharding; pub(crate) mod transactions; diff --git a/integration-tests/src/test_loop/utils/resharding.rs b/integration-tests/src/test_loop/utils/resharding.rs new file mode 100644 index 00000000000..005fc3f3fad --- /dev/null +++ b/integration-tests/src/test_loop/utils/resharding.rs @@ -0,0 +1,483 @@ +use std::cell::Cell; + +use assert_matches::assert_matches; +use itertools::Itertools; +use near_async::test_loop::data::TestLoopData; +use near_client::{Query, QueryError::GarbageCollectedBlock}; +use near_crypto::Signer; +use near_primitives::test_utils::create_user_test_signer; +use near_primitives::transaction::SignedTransaction; +use near_primitives::types::{AccountId, BlockId, BlockReference, Gas}; +use near_primitives::views::{ + FinalExecutionStatus, QueryRequest, QueryResponse, QueryResponseKind, +}; +use rand::seq::SliceRandom; +use rand::{Rng, SeedableRng}; +use rand_chacha::ChaCha20Rng; + +use super::sharding::this_block_has_new_shard_layout; +use crate::test_loop::env::TestData; +use crate::test_loop::utils::loop_action::LoopAction; +use crate::test_loop::utils::sharding::next_block_has_new_shard_layout; +use crate::test_loop::utils::transactions::{ + check_txs, delete_account, get_anchor_hash, get_next_nonce, store_and_submit_tx, submit_tx, +}; +use crate::test_loop::utils::{get_node_data, retrieve_client_actor, ONE_NEAR, TGAS}; + +// Returns a callable function that, when invoked inside a test loop iteration, can force the creation of a chain fork. +#[cfg(feature = "test_features")] +pub(crate) fn fork_before_resharding_block(double_signing: bool) -> LoopAction { + use near_client::client_actor::AdvProduceBlockHeightSelection; + + let (done, succeeded) = LoopAction::shared_success_flag(); + let action_fn = Box::new( + move |node_datas: &[TestData], + test_loop_data: &mut TestLoopData, + client_account_id: AccountId| { + // It must happen only for the first resharding block encountered. + if done.get() { + return; + } + let client_actor = + retrieve_client_actor(node_datas, test_loop_data, &client_account_id); + let tip = client_actor.client.chain.head().unwrap(); + + // If there's a new shard layout force a chain fork. + if next_block_has_new_shard_layout(client_actor.client.epoch_manager.as_ref(), &tip) { + println!("creating chain fork at height {}", tip.height); + let height_selection = if double_signing { + // In the double signing scenario we want a new block on top of prev block, with consecutive height. + AdvProduceBlockHeightSelection::NextHeightOnSelectedBlock { + base_block_height: tip.height - 1, + } + } else { + // To avoid double signing skip already produced height. + AdvProduceBlockHeightSelection::SelectedHeightOnSelectedBlock { + produced_block_height: tip.height + 1, + base_block_height: tip.height - 1, + } + }; + client_actor.adv_produce_blocks_on(3, true, height_selection); + done.set(true); + } + }, + ); + LoopAction::new(action_fn, succeeded) +} + +pub(crate) fn execute_money_transfers(account_ids: Vec) -> LoopAction { + const NUM_TRANSFERS_PER_BLOCK: usize = 20; + + let latest_height = Cell::new(0); + let seed = rand::thread_rng().gen::(); + println!("Random seed: {}", seed); + + let (ran_transfers, succeeded) = LoopAction::shared_success_flag(); + let action_fn = Box::new( + move |node_datas: &[TestData], + test_loop_data: &mut TestLoopData, + client_account_id: AccountId| { + let client_actor = + retrieve_client_actor(node_datas, test_loop_data, &client_account_id); + let tip = client_actor.client.chain.head().unwrap(); + + // Run this action only once at every block height. + if latest_height.get() == tip.height { + return; + } + latest_height.set(tip.height); + + let mut slice = [0u8; 32]; + slice[0..8].copy_from_slice(&seed.to_le_bytes()); + slice[8..16].copy_from_slice(&tip.height.to_le_bytes()); + let mut rng: ChaCha20Rng = SeedableRng::from_seed(slice); + + for _ in 0..NUM_TRANSFERS_PER_BLOCK { + let sender = account_ids.choose(&mut rng).unwrap().clone(); + let receiver = account_ids.choose(&mut rng).unwrap().clone(); + + let clients = node_datas + .iter() + .map(|test_data| { + &test_loop_data.get(&test_data.client_sender.actor_handle()).client + }) + .collect_vec(); + + let anchor_hash = get_anchor_hash(&clients); + let nonce = get_next_nonce(&test_loop_data, &node_datas, &sender); + let amount = ONE_NEAR * rng.gen_range(1..=10); + let tx = SignedTransaction::send_money( + nonce, + sender.clone(), + receiver.clone(), + &create_user_test_signer(&sender).into(), + amount, + anchor_hash, + ); + submit_tx(&node_datas, &client_account_id, tx); + } + ran_transfers.set(true); + }, + ); + LoopAction::new(action_fn, succeeded) +} + +/// Returns a loop action that invokes a costly method from a contract +/// `CALLS_PER_BLOCK_HEIGHT` times per block height. +/// +/// The account invoking the contract is taken in sequential order from `signed_ids`. +/// +/// The account receiving the contract call is taken in sequential order from `receiver_ids`. +pub(crate) fn call_burn_gas_contract( + signer_ids: Vec, + receiver_ids: Vec, + gas_burnt_per_call: Gas, + epoch_length: u64, +) -> LoopAction { + const CALLS_PER_BLOCK_HEIGHT: usize = 5; + // Set to a value large enough, so that transactions from the past epoch are settled. + // Must be less than epoch length, otherwise won't be triggered before the test is finished. + let tx_check_blocks_after_resharding = epoch_length - 2; + + let resharding_height = Cell::new(None); + let nonce = Cell::new(102); + let txs = Cell::new(vec![]); + let latest_height = Cell::new(0); + let (checked_transactions, succeeded) = LoopAction::shared_success_flag(); + + let action_fn = Box::new( + move |node_datas: &[TestData], + test_loop_data: &mut TestLoopData, + client_account_id: AccountId| { + let client_actor = + retrieve_client_actor(node_datas, test_loop_data, &client_account_id); + let tip = client_actor.client.chain.head().unwrap(); + + // Run this action only once at every block height. + if latest_height.get() == tip.height { + return; + } + latest_height.set(tip.height); + + // After resharding: wait some blocks and check that all txs have been executed correctly. + if let Some(height) = resharding_height.get() { + if tip.height > height + tx_check_blocks_after_resharding { + for (tx, tx_height) in txs.take() { + let tx_outcome = + client_actor.client.chain.get_partial_transaction_result(&tx); + let status = tx_outcome.as_ref().map(|o| o.status.clone()); + let status = status.unwrap(); + tracing::debug!(target: "test", ?tx_height, ?tx, ?status, "transaction status"); + assert_matches!(status, FinalExecutionStatus::SuccessValue(_)); + } + checked_transactions.set(true); + } + } else { + if next_block_has_new_shard_layout(client_actor.client.epoch_manager.as_ref(), &tip) + { + tracing::debug!(target: "test", height=tip.height, "resharding height set"); + resharding_height.set(Some(tip.height)); + } + } + // Before resharding and one block after: call the test contract a few times per block. + // The objective is to pile up receipts (e.g. delayed). + if tip.height <= resharding_height.get().unwrap_or(1000) + 1 { + for i in 0..CALLS_PER_BLOCK_HEIGHT { + // Note that if the number of signers and receivers is the + // same then the traffic will always flow the same way. It + // would be nice to randomize it a bit. + let signer_id = &signer_ids[i % signer_ids.len()]; + let receiver_id = &receiver_ids[i % receiver_ids.len()]; + let signer: Signer = create_user_test_signer(signer_id).into(); + nonce.set(nonce.get() + 1); + let method_name = "burn_gas_raw".to_owned(); + let burn_gas: u64 = gas_burnt_per_call; + let args = burn_gas.to_le_bytes().to_vec(); + let tx = SignedTransaction::call( + nonce.get(), + signer_id.clone(), + receiver_id.clone(), + &signer, + 1, + method_name, + args, + gas_burnt_per_call + 10 * TGAS, + tip.last_block_hash, + ); + store_and_submit_tx( + &node_datas, + &client_account_id, + &txs, + &signer_id, + &receiver_id, + tip.height, + tx, + ); + } + } + }, + ); + LoopAction::new(action_fn, succeeded) +} + +/// Sends a promise-yield transaction before resharding. Then, if `call_resume` is `true` also sends +/// a yield-resume transaction after resharding, otherwise it lets the promise-yield go into timeout. +/// +/// Each `signer_id` sends transaction to the corresponding `receiver_id`. +/// +/// A few blocks after resharding all transactions outcomes are checked for successful execution. +pub(crate) fn call_promise_yield( + call_resume: bool, + signer_ids: Vec, + receiver_ids: Vec, +) -> LoopAction { + let resharding_height: Cell> = Cell::new(None); + let txs = Cell::new(vec![]); + let latest_height = Cell::new(0); + let promise_txs_sent = Cell::new(false); + let nonce = Cell::new(102); + let yield_payload = vec![]; + let (checked_transactions, succeeded) = LoopAction::shared_success_flag(); + + let action_fn = Box::new( + move |node_datas: &[TestData], + test_loop_data: &mut TestLoopData, + client_account_id: AccountId| { + let client_actor = + retrieve_client_actor(node_datas, test_loop_data, &client_account_id); + let tip = client_actor.client.chain.head().unwrap(); + + // Run this action only once at every block height. + if latest_height.get() == tip.height { + return; + } + latest_height.set(tip.height); + + // The operation to be done depends on the current block height in relation to the + // resharding height. + match (resharding_height.get(), latest_height.get()) { + // Resharding happened in the previous block. + // Maybe send the resume transaction. + (Some(resharding), latest) if latest == resharding + 1 && call_resume => { + for (signer_id, receiver_id) in + signer_ids.clone().into_iter().zip(receiver_ids.clone().into_iter()) + { + let signer: Signer = create_user_test_signer(&signer_id).into(); + nonce.set(nonce.get() + 1); + let tx = SignedTransaction::call( + nonce.get(), + signer_id.clone(), + receiver_id.clone(), + &signer, + 1, + "call_yield_resume_read_data_id_from_storage".to_string(), + yield_payload.clone(), + 300 * TGAS, + tip.last_block_hash, + ); + store_and_submit_tx( + &node_datas, + &client_account_id, + &txs, + &signer_id, + &receiver_id, + tip.height, + tx, + ); + } + } + // Resharding happened a few blocks in the past. + // Check transactions' outcomes. + (Some(resharding), latest) if latest == resharding + 4 => { + let txs = txs.take(); + assert_ne!(txs.len(), 0); + for (tx, tx_height) in txs { + let tx_outcome = + client_actor.client.chain.get_partial_transaction_result(&tx); + let status = tx_outcome.as_ref().map(|o| o.status.clone()); + let status = status.unwrap(); + tracing::debug!(target: "test", ?tx_height, ?tx, ?status, "transaction status"); + assert_matches!(status, FinalExecutionStatus::SuccessValue(_)); + } + checked_transactions.set(true); + } + (Some(_resharding), _latest) => {} + // Resharding didn't happen in the past. + (None, _) => { + // Check if resharding will happen in this block. + if next_block_has_new_shard_layout( + client_actor.client.epoch_manager.as_ref(), + &tip, + ) { + tracing::debug!(target: "test", height=tip.height, "resharding height set"); + resharding_height.set(Some(tip.height)); + return; + } + // Before resharding, send a set of promise transactions, just once. + if promise_txs_sent.get() { + return; + } + for (signer_id, receiver_id) in + signer_ids.clone().into_iter().zip(receiver_ids.clone().into_iter()) + { + let signer: Signer = create_user_test_signer(&signer_id).into(); + nonce.set(nonce.get() + 1); + let tx = SignedTransaction::call( + nonce.get(), + signer_id.clone(), + receiver_id.clone(), + &signer, + 0, + "call_yield_create_return_promise".to_string(), + yield_payload.clone(), + 300 * TGAS, + tip.last_block_hash, + ); + store_and_submit_tx( + &node_datas, + &client_account_id, + &txs, + &signer_id, + &receiver_id, + tip.height, + tx, + ); + } + promise_txs_sent.set(true); + } + } + }, + ); + LoopAction::new(action_fn, succeeded) +} + +/// After resharding and gc-period, assert the deleted `account_id` +/// is still accessible through archival node view client (if available), +/// and it is not accessible through a regular, RPC node. +fn check_deleted_account_availability( + node_datas: &[TestData], + test_loop_data: &mut TestLoopData, + archival_id: &Option, + rpc_id: &AccountId, + account_id: &AccountId, + height: u64, +) { + let rpc_node_data = get_node_data(node_datas, &rpc_id); + let rpc_view_client_handle = rpc_node_data.view_client_sender.actor_handle(); + + let block_reference = BlockReference::BlockId(BlockId::Height(height)); + let request = QueryRequest::ViewAccount { account_id: account_id.clone() }; + let msg = Query::new(block_reference, request); + + let rpc_node_result = { + let view_client = test_loop_data.get_mut(&rpc_view_client_handle); + near_async::messaging::Handler::handle(view_client, msg.clone()) + }; + assert_matches!(rpc_node_result, Err(GarbageCollectedBlock { .. })); + + if let Some(archival_id) = archival_id { + let archival_node_data = get_node_data(node_datas, &archival_id); + let archival_view_client_handle = archival_node_data.view_client_sender.actor_handle(); + let archival_node_result = { + let view_client = test_loop_data.get_mut(&archival_view_client_handle); + near_async::messaging::Handler::handle(view_client, msg) + }; + assert_matches!( + archival_node_result, + Ok(QueryResponse { kind: QueryResponseKind::ViewAccount(_), .. }) + ); + } +} + +/// Loop action testing a scenario where a temporary account is deleted after resharding. +/// After `gc_num_epochs_to_keep epochs` we assert that the account +/// is not accesible through RPC node but it is still accesible through archival node. +/// +/// The `temporary_account_id` must be a subaccount of the `originator_id`. +pub(crate) fn temporary_account_during_resharding( + archival_id: Option, + rpc_id: AccountId, + originator_id: AccountId, + temporary_account_id: AccountId, +) -> LoopAction { + let latest_height = Cell::new(0); + let resharding_height = Cell::new(None); + let target_height = Cell::new(None); + + let delete_account_tx_hash = Cell::new(None); + let checked_deleted_account = Cell::new(false); + + let (done, succeeded) = LoopAction::shared_success_flag(); + let action_fn = Box::new( + move |node_datas: &[TestData], + test_loop_data: &mut TestLoopData, + client_account_id: AccountId| { + if done.get() { + return; + } + + let client_actor = + retrieve_client_actor(node_datas, test_loop_data, &client_account_id); + let tip = client_actor.client.chain.head().unwrap(); + + // Run this action only once at every block height. + if latest_height.get() == tip.height { + return; + } + latest_height.set(tip.height); + let epoch_length = client_actor.client.config.epoch_length; + let gc_num_epochs_to_keep = client_actor.client.config.gc.gc_num_epochs_to_keep; + + if resharding_height.get().is_none() { + if !this_block_has_new_shard_layout( + client_actor.client.epoch_manager.as_ref(), + &tip, + ) { + return; + } + // Just resharded. Delete the temporary account and set the target height + // high enough so that the delete account transaction will be garbage collected. + let tx_hash = delete_account( + test_loop_data, + node_datas, + &client_account_id, + &temporary_account_id, + &originator_id, + ); + delete_account_tx_hash.set(Some(tx_hash)); + target_height + .set(Some(latest_height.get() + (gc_num_epochs_to_keep + 1) * epoch_length)); + resharding_height.set(Some(latest_height.get())); + } + + // If an epoch passed since resharding, make sure the delete account transaction finished. + if latest_height.get() == resharding_height.get().unwrap() + epoch_length { + check_txs( + test_loop_data, + node_datas, + &client_account_id, + &[delete_account_tx_hash.get().unwrap()], + ); + checked_deleted_account.set(true); + } + + if latest_height.get() < target_height.get().unwrap() { + return; + } + assert!(checked_deleted_account.get()); + // Since gc window passed after the account was deleted, + // check that it is not accessible through regular node, + // but it is accessible through archival node. + check_deleted_account_availability( + node_datas, + test_loop_data, + &archival_id, + &rpc_id, + &temporary_account_id, + resharding_height.get().unwrap(), + ); + done.set(true); + }, + ); + LoopAction::new(action_fn, succeeded) +} diff --git a/integration-tests/src/test_loop/utils/transactions.rs b/integration-tests/src/test_loop/utils/transactions.rs index 9dda074d95f..a79366a7289 100644 --- a/integration-tests/src/test_loop/utils/transactions.rs +++ b/integration-tests/src/test_loop/utils/transactions.rs @@ -171,7 +171,7 @@ pub fn do_create_account( let nonce = get_next_nonce(&env.test_loop.data, &env.datas, originator); let tx = create_account(env, rpc_id, originator, new_account_id, amount, nonce); env.test_loop.run_for(Duration::seconds(5)); - check_txs(&env.test_loop, &env.datas, rpc_id, &[tx]); + check_txs(&env.test_loop.data, &env.datas, rpc_id, &[tx]); } pub fn do_delete_account( @@ -181,9 +181,9 @@ pub fn do_delete_account( beneficiary_id: &AccountId, ) { tracing::info!(target: "test", "Deleting account."); - let tx = delete_account(env, rpc_id, account_id, beneficiary_id); + let tx = delete_account(&env.test_loop.data, &env.datas, rpc_id, account_id, beneficiary_id); env.test_loop.run_for(Duration::seconds(5)); - check_txs(&env.test_loop, &env.datas, rpc_id, &[tx]); + check_txs(&env.test_loop.data, &env.datas, rpc_id, &[tx]); } pub fn do_deploy_contract( @@ -196,7 +196,7 @@ pub fn do_deploy_contract( let nonce = get_next_nonce(&env.test_loop.data, &env.datas, contract_id); let tx = deploy_contract(&mut env.test_loop, &env.datas, rpc_id, contract_id, code, nonce); env.test_loop.run_for(Duration::seconds(2)); - check_txs(&env.test_loop, &env.datas, rpc_id, &[tx]); + check_txs(&env.test_loop.data, &env.datas, rpc_id, &[tx]); } pub fn do_call_contract( @@ -220,7 +220,7 @@ pub fn do_call_contract( nonce, ); env.test_loop.run_for(Duration::seconds(2)); - check_txs(&env.test_loop, &env.datas, rpc_id, &[tx]); + check_txs(&env.test_loop.data, &env.datas, rpc_id, &[tx]); } pub fn create_account( @@ -231,7 +231,7 @@ pub fn create_account( amount: u128, nonce: u64, ) -> CryptoHash { - let block_hash = get_shared_block_hash(&env.datas, &env.test_loop); + let block_hash = get_shared_block_hash(&env.datas, &env.test_loop.data); let signer = create_user_test_signer(&originator); let new_signer: Signer = create_user_test_signer(&new_account_id); @@ -252,14 +252,15 @@ pub fn create_account( } pub fn delete_account( - env: &mut TestLoopEnv, + test_loop_data: &TestLoopData, + node_datas: &[TestData], rpc_id: &AccountId, account_id: &AccountId, beneficiary_id: &AccountId, ) -> CryptoHash { let signer: Signer = create_user_test_signer(&account_id).into(); - let nonce = get_next_nonce(&env.test_loop.data, &env.datas, account_id); - let block_hash = get_shared_block_hash(&env.datas, &env.test_loop); + let nonce = get_next_nonce(&test_loop_data, node_datas, account_id); + let block_hash = get_shared_block_hash(node_datas, test_loop_data); let tx = SignedTransaction::delete_account( nonce, @@ -271,7 +272,7 @@ pub fn delete_account( ); let tx_hash = tx.get_hash(); - submit_tx(&env.datas, rpc_id, tx); + submit_tx(node_datas, rpc_id, tx); tracing::debug!(target: "test", ?account_id, ?beneficiary_id, ?tx_hash, "deleted account"); tx_hash } @@ -289,7 +290,7 @@ pub fn deploy_contract( code: Vec, nonce: u64, ) -> CryptoHash { - let block_hash = get_shared_block_hash(node_datas, test_loop); + let block_hash = get_shared_block_hash(node_datas, &test_loop.data); let signer = create_user_test_signer(&contract_id); @@ -314,7 +315,7 @@ pub fn call_contract( args: Vec, nonce: u64, ) -> CryptoHash { - let block_hash = get_shared_block_hash(node_datas, test_loop); + let block_hash = get_shared_block_hash(node_datas, &test_loop.data); let signer = create_user_test_signer(sender_id); let attach_gas = 300 * TGAS; let deposit = 0; @@ -355,12 +356,12 @@ pub fn submit_tx(node_datas: &[TestData], rpc_id: &AccountId, tx: SignedTransact /// Please note that it's important to use an rpc node that tracks all shards. /// Otherwise, the transactions may not be found. pub fn check_txs( - test_loop: &TestLoopV2, + test_loop_data: &TestLoopData, node_datas: &[TestData], rpc_id: &AccountId, txs: &[CryptoHash], ) { - let rpc = rpc_client(test_loop, node_datas, rpc_id); + let rpc = rpc_client(test_loop_data, node_datas, rpc_id); for &tx in txs { let tx_outcome = rpc.chain.get_partial_transaction_result(&tx); @@ -373,21 +374,21 @@ pub fn check_txs( /// Get the client for the provided rpd node account id. fn rpc_client<'a>( - test_loop: &'a TestLoopV2, + test_loop_data: &'a TestLoopData, node_datas: &'a [TestData], rpc_id: &AccountId, ) -> &'a Client { let node_data = get_node_data(node_datas, rpc_id); let client_actor_handle = node_data.client_sender.actor_handle(); - let client_actor = test_loop.data.get(&client_actor_handle); + let client_actor = test_loop_data.get(&client_actor_handle); &client_actor.client } /// Finds a block that all clients have on their chain and return its hash. -pub fn get_shared_block_hash(node_datas: &[TestData], test_loop: &TestLoopV2) -> CryptoHash { +pub fn get_shared_block_hash(node_datas: &[TestData], test_loop_data: &TestLoopData) -> CryptoHash { let clients = node_datas .iter() - .map(|data| &test_loop.data.get(&data.client_sender.actor_handle()).client) + .map(|data| &test_loop_data.get(&data.client_sender.actor_handle()).client) .collect_vec(); let (_, block_hash) = clients diff --git a/integration-tests/src/test_loop/utils/trie_sanity.rs b/integration-tests/src/test_loop/utils/trie_sanity.rs index 56e102339bf..ca12fe83b35 100644 --- a/integration-tests/src/test_loop/utils/trie_sanity.rs +++ b/integration-tests/src/test_loop/utils/trie_sanity.rs @@ -338,7 +338,11 @@ fn should_assert_state_sanity( } /// Asserts that all parent shard State is accessible via parent and children shards. -pub fn check_state_shard_uid_mapping_after_resharding(client: &Client, parent_shard_uid: ShardUId) { +pub fn check_state_shard_uid_mapping_after_resharding( + client: &Client, + parent_shard_uid: ShardUId, + allow_negative_refcount: bool, +) { let tip = client.chain.head().unwrap(); let epoch_id = tip.epoch_id; let epoch_config = client.epoch_manager.get_epoch_config(&epoch_id).unwrap(); @@ -356,7 +360,18 @@ pub fn check_state_shard_uid_mapping_after_resharding(client: &Client, parent_sh continue; } let node_hash = CryptoHash::try_from_slice(&key[8..]).unwrap(); - let (value, _) = decode_value_with_rc(&value); + let (value, rc) = decode_value_with_rc(&value); + // It is possible we have delayed receipts leftovers on disk, + // that would result it `MissingTrieValue` if we attempt to read them through the Trie interface. + // TODO(resharding) Remove this when negative refcounts are properly handled. + if rc <= 0 { + assert!(allow_negative_refcount); + // That can only be -1, because delayed receipt can be removed at most twice (by both children). + assert_eq!(rc, -1); + // In case of negative refcount, we only store the refcount, and the value is empty. + assert!(value.is_none()); + continue; + } let parent_value = store.get(parent_shard_uid, &node_hash); // Parent shard data must still be accessible using parent ShardUId. assert_eq!(&parent_value.unwrap()[..], value.unwrap()); From 21b5109af24aae90ef429bbcff0c4f5082fa777b Mon Sep 17 00:00:00 2001 From: Andrea Date: Fri, 3 Jan 2025 10:37:36 +0100 Subject: [PATCH 15/15] fix(resharding): do not fail flat storage resharding if account id does not belong to parent (#12675) If `account_id` of a key does not map to any child (and thus does not belong to the parent in the first place), just log a warning and avoid failing the entire operation. --- chain/chain/src/flat_storage_resharder.rs | 68 +++++++++++++++++++++-- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/chain/chain/src/flat_storage_resharder.rs b/chain/chain/src/flat_storage_resharder.rs index 47e78ca8ebc..a362fbb5a3c 100644 --- a/chain/chain/src/flat_storage_resharder.rs +++ b/chain/chain/src/flat_storage_resharder.rs @@ -7,7 +7,7 @@ use std::sync::{Arc, Mutex}; use near_chain_configs::{MutableConfigValue, ReshardingConfig, ReshardingHandle}; use near_chain_primitives::Error; -use tracing::{debug, error, info}; +use tracing::{debug, error, info, warn}; use crate::resharding::event_type::{ReshardingEventType, ReshardingSplitShardParams}; use crate::resharding::types::{ @@ -1025,9 +1025,10 @@ fn copy_kv_to_child( // Sanity check we are truly writing to one of the expected children shards. if new_shard_uid != *left_child_shard && new_shard_uid != *right_child_shard { - let err_msg = "account id doesn't map to any child shard!"; - error!(target: "resharding", ?new_shard_uid, ?left_child_shard, ?right_child_shard, ?shard_layout, ?account_id, err_msg); - return Err(Error::ReshardingError(err_msg.to_string())); + let err_msg = "account id doesn't map to any child shard! - skipping it"; + warn!(target: "resharding", ?new_shard_uid, ?left_child_shard, ?right_child_shard, ?shard_layout, ?account_id, err_msg); + // Do not fail resharding. Just skip this entry. + return Ok(()); } // Add the new flat store entry. store_update.set(new_shard_uid, key, value); @@ -2561,4 +2562,63 @@ mod tests { Ok(FlatStorageStatus::Ready(FlatStorageReadyStatus { flat_head })) ); } + + /// This test asserts that resharding doesn't fail if flat storage iteration goes over an account + /// which is not part of any children shards after the shard layout changes. + #[test] + fn unrelated_account_do_not_fail_splitting() { + init_test_logger(); + let (mut chain, resharder, sender) = + create_chain_resharder_sender::(simple_shard_layout()); + let new_shard_layout = shard_layout_after_split(); + let resharding_event_type = event_type_from_chain_and_layout(&chain, &new_shard_layout); + let ReshardingSplitShardParams { + parent_shard, left_child_shard, right_child_shard, .. + } = match resharding_event_type.clone() { + ReshardingEventType::SplitShard(params) => params, + }; + let flat_store = resharder.runtime.store().flat_store(); + + // Add two blocks on top of genesis. This will make the resharding block (height 0) final. + add_blocks_to_chain( + &mut chain, + 2, + PreviousBlockHeight::ChainHead, + NextBlockHeight::ChainHeadPlusOne, + ); + + // Inject an account which doesn't belong to the parent shard into its flat storage. + let mut store_update = flat_store.store_update(); + let test_value = Some(FlatStateValue::Inlined(vec![0])); + let key = TrieKey::Account { account_id: account!("ab") }; + store_update.set(parent_shard, key.to_vec(), test_value); + store_update.commit().unwrap(); + + // Perform resharding. + assert!(resharder.start_resharding(resharding_event_type, &new_shard_layout).is_ok()); + sender.call_split_shard_task(); + + // Check final status of parent flat storage. + let parent = ShardUId { version: 3, shard_id: 1 }; + assert_eq!(flat_store.get_flat_storage_status(parent), Ok(FlatStorageStatus::Empty)); + assert_eq!(flat_store.iter(parent).count(), 0); + assert!(resharder + .runtime + .get_flat_storage_manager() + .get_flat_storage_for_shard(parent) + .is_none()); + + // Check intermediate status of children flat storages. + // If children reached the catching up state, it means that the split task succeeded. + for child in [left_child_shard, right_child_shard] { + assert_eq!( + flat_store.get_flat_storage_status(child), + Ok(FlatStorageStatus::Resharding(FlatStorageReshardingStatus::CatchingUp( + chain.final_head().unwrap().into() + ))) + ); + // However, the unrelated account should not end up in any child. + assert!(flat_store.get(child, &key.to_vec()).is_ok_and(|val| val.is_none())); + } + } }