-
Notifications
You must be signed in to change notification settings - Fork 664
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(resharding) - congestion info computation #12581
Changes from all commits
2ccd943
bb57b01
ffe30d9
9d04209
78b5bc8
f5ddc63
419e804
8b73a77
0457ee3
e432db8
4eea6ad
752e804
f49a309
f10e0ac
fbcca65
db568c2
d6e3617
c8c683a
bb0f0be
dca5776
d1da511
f1d224f
fa3fef0
130daf9
4424fae
2796ac0
e9517cf
be19d21
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
use std::cell::RefCell; | ||
use std::io; | ||
use std::sync::Arc; | ||
|
||
|
@@ -6,20 +7,23 @@ use super::types::ReshardingSender; | |
use crate::flat_storage_resharder::{FlatStorageResharder, FlatStorageResharderController}; | ||
use crate::types::RuntimeAdapter; | ||
use crate::ChainStoreUpdate; | ||
use itertools::Itertools; | ||
use near_chain_configs::{MutableConfigValue, ReshardingConfig, ReshardingHandle}; | ||
use near_chain_primitives::Error; | ||
use near_epoch_manager::EpochManagerAdapter; | ||
use near_primitives::block::Block; | ||
use near_primitives::challenge::PartialState; | ||
use near_primitives::congestion_info::CongestionInfo; | ||
use near_primitives::hash::CryptoHash; | ||
use near_primitives::shard_layout::{get_block_shard_uid, ShardLayout}; | ||
use near_primitives::types::chunk_extra::ChunkExtra; | ||
use near_store::adapter::{StoreAdapter, StoreUpdateAdapter}; | ||
use near_store::flat::BlockInfo; | ||
use near_store::trie::mem::mem_trie_update::TrackingMode; | ||
use near_store::trie::ops::resharding::RetainMode; | ||
use near_store::trie::outgoing_metadata::ReceiptGroupsQueue; | ||
use near_store::trie::TrieRecorder; | ||
use near_store::{DBCol, ShardTries, ShardUId, Store}; | ||
use near_store::{DBCol, ShardTries, ShardUId, Store, TrieAccess}; | ||
|
||
pub struct ReshardingManager { | ||
store: Store, | ||
|
@@ -183,7 +187,7 @@ impl ReshardingManager { | |
// blocks, the second finalization will crash. | ||
tries.freeze_mem_tries(parent_shard_uid, split_shard_event.children_shards())?; | ||
|
||
let chunk_extra = self.get_chunk_extra(block_hash, &parent_shard_uid)?; | ||
let parent_chunk_extra = self.get_chunk_extra(block_hash, &parent_shard_uid)?; | ||
let boundary_account = split_shard_event.boundary_account; | ||
|
||
let mut trie_store_update = self.store.store_update(); | ||
|
@@ -210,20 +214,48 @@ impl ReshardingManager { | |
let mut mem_tries = mem_tries.write().unwrap(); | ||
let mut trie_recorder = TrieRecorder::new(); | ||
let mode = TrackingMode::RefcountsAndAccesses(&mut trie_recorder); | ||
let mem_trie_update = mem_tries.update(*chunk_extra.state_root(), mode)?; | ||
let mem_trie_update = mem_tries.update(*parent_chunk_extra.state_root(), mode)?; | ||
|
||
let trie_changes = mem_trie_update.retain_split_shard(&boundary_account, retain_mode); | ||
let partial_storage = trie_recorder.recorded_storage(); | ||
let mem_changes = trie_changes.mem_trie_changes.as_ref().unwrap(); | ||
let new_state_root = mem_tries.apply_memtrie_changes(block_height, mem_changes); | ||
drop(mem_tries); | ||
|
||
// Get the congestion info for the child. | ||
let parent_epoch_id = block.header().epoch_id(); | ||
let parent_shard_layout = self.epoch_manager.get_shard_layout(&parent_epoch_id)?; | ||
let parent_state_root = *parent_chunk_extra.state_root(); | ||
let parent_trie = tries.get_trie_for_shard(parent_shard_uid, parent_state_root); | ||
let parent_congestion_info = | ||
parent_chunk_extra.congestion_info().expect("The congestion info must exist!"); | ||
|
||
let trie_recorder = RefCell::new(trie_recorder); | ||
let parent_trie = parent_trie.recording_reads_with_recorder(trie_recorder); | ||
|
||
let child_epoch_id = self.epoch_manager.get_next_epoch_id(block.hash())?; | ||
let child_shard_layout = self.epoch_manager.get_shard_layout(&child_epoch_id)?; | ||
let child_congestion_info = Self::get_child_congestion_info( | ||
&parent_trie, | ||
&parent_shard_layout, | ||
parent_congestion_info, | ||
&child_shard_layout, | ||
new_shard_uid, | ||
retain_mode, | ||
)?; | ||
|
||
let trie_recorder = parent_trie.take_recorder().unwrap(); | ||
let partial_storage = trie_recorder.borrow_mut().recorded_storage(); | ||
let partial_state_len = match &partial_storage.nodes { | ||
PartialState::TrieValues(values) => values.len(), | ||
}; | ||
let mem_changes = trie_changes.mem_trie_changes.as_ref().unwrap(); | ||
let new_state_root = mem_tries.apply_memtrie_changes(block_height, mem_changes); | ||
|
||
// TODO(resharding): set all fields of `ChunkExtra`. Consider stronger | ||
// typing. Clarify where it should happen when `State` and | ||
// `FlatState` update is implemented. | ||
let mut child_chunk_extra = ChunkExtra::clone(&chunk_extra); | ||
let mut child_chunk_extra = ChunkExtra::clone(&parent_chunk_extra); | ||
*child_chunk_extra.state_root_mut() = new_state_root; | ||
*child_chunk_extra.congestion_info_mut().expect("The congestion info must exist!") = | ||
child_congestion_info; | ||
|
||
chain_store_update.save_chunk_extra(block_hash, &new_shard_uid, child_chunk_extra); | ||
chain_store_update.save_state_transition_data( | ||
|
@@ -256,6 +288,100 @@ impl ReshardingManager { | |
Ok(()) | ||
} | ||
|
||
pub fn get_child_congestion_info( | ||
parent_trie: &dyn TrieAccess, | ||
parent_shard_layout: &ShardLayout, | ||
parent_congestion_info: CongestionInfo, | ||
child_shard_layout: &ShardLayout, | ||
child_shard_uid: ShardUId, | ||
retain_mode: RetainMode, | ||
) -> Result<CongestionInfo, Error> { | ||
// Get the congestion info based on the parent shard. | ||
let mut child_congestion_info = Self::get_child_congestion_info_not_finalized( | ||
parent_trie, | ||
&parent_shard_layout, | ||
parent_congestion_info, | ||
retain_mode, | ||
)?; | ||
|
||
// Set the allowed shard based on the child shard. | ||
Self::finalize_allowed_shard( | ||
&child_shard_layout, | ||
child_shard_uid, | ||
&mut child_congestion_info, | ||
)?; | ||
|
||
Ok(child_congestion_info) | ||
} | ||
|
||
// Get the congestion info for the child shard. The congestion info can be | ||
// inferred efficiently from the combination of the parent shard's | ||
// congestion info and the receipt group metadata, that is available in the | ||
// parent shard's trie. | ||
fn get_child_congestion_info_not_finalized( | ||
parent_trie: &dyn TrieAccess, | ||
parent_shard_layout: &ShardLayout, | ||
parent_congestion_info: CongestionInfo, | ||
retain_mode: RetainMode, | ||
) -> Result<CongestionInfo, Error> { | ||
// The left child contains all the delayed and buffered receipts from the | ||
// parent so it should have identical congestion info. | ||
if retain_mode == RetainMode::Left { | ||
return Ok(parent_congestion_info); | ||
} | ||
|
||
// The right child contains all the delayed receipts from the parent but it | ||
// has no buffered receipts. It's info needs to be computed by subtracting | ||
// the parent's buffered receipts from the parent's congestion info. | ||
let mut congestion_info = parent_congestion_info; | ||
for shard_id in parent_shard_layout.shard_ids() { | ||
let receipt_groups = ReceiptGroupsQueue::load(parent_trie, shard_id)?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if there's some way to protect against a scenario where the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's borderline impossible that we wouldn't go through the queue for a full epoch. We do have some assertions in place to check that congestion info is zeroed in the right places iff the queue is empty so at least we'll detect the issue some time later. I don't think delaying is feasible as resharding is scheduled an epoch in advance and we only know about this condition at the last moment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe it'd be possible to bootstrap congestion info? Idk, not ideal either There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, cool idea, it could work but if we're swamped with receipts that could blow up the state witness. I still don't think it's necessary to worry about this case. |
||
let Some(receipt_groups) = receipt_groups else { | ||
continue; | ||
}; | ||
|
||
let bytes = receipt_groups.total_size(); | ||
let gas = receipt_groups.total_gas(); | ||
|
||
congestion_info | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we not have to set There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good sanity check! The |
||
.remove_buffered_receipt_gas(gas) | ||
.expect("Buffered gas must not exceed congestion info buffered gas"); | ||
congestion_info | ||
.remove_receipt_bytes(bytes) | ||
.expect("Buffered size must not exceed congestion info buffered size"); | ||
} | ||
|
||
// The right child does not inherit any buffered receipts. The | ||
// congestion info must match this invariant. | ||
assert_eq!(congestion_info.buffered_receipts_gas(), 0); | ||
|
||
Ok(congestion_info) | ||
} | ||
|
||
fn finalize_allowed_shard( | ||
child_shard_layout: &ShardLayout, | ||
child_shard_uid: ShardUId, | ||
congestion_info: &mut CongestionInfo, | ||
) -> Result<(), Error> { | ||
let all_shards = child_shard_layout.shard_ids().collect_vec(); | ||
let own_shard = child_shard_uid.shard_id(); | ||
let own_shard_index = child_shard_layout | ||
.get_shard_index(own_shard)? | ||
.try_into() | ||
.expect("ShardIndex must fit in u64"); | ||
// Please note that the congestion seed used during resharding is | ||
// different than the one used during normal operation. In runtime the | ||
// seed is set to the sum of shard index and block height. The block | ||
// height isn't easily available on all call sites which is why the | ||
// simplified seed is used. This is valid because it's deterministic and | ||
// resharding is a very rare event. However in a perfect world it should | ||
// be the same. | ||
// TODO - Use proper congestion control seed during resharding. | ||
let congestion_seed = own_shard_index; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be nice to use the same seed as usual (derived from block height), but I guess it doesn't matter, as long as it's deterministic and only on resharding boundaries. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah the issue was that I couldn't get hold of the block height easily on the stateless validation path. Let me add a TODO but for now as you said, as long as it's deterministic and one off it's fine. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Quick question, doesn't this mess up the coordination of allowed shard with respect to other shards? After resharding we split Shard 4 to Shard 5 and Shard 6 (index 4 and 5). If we break the coordination and just assign allowed shard based on Or is this issue just for the one block after resharding and life returns to normal after this block? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There shouldn't be any duplicated allowed shards but please don't ask about missing chunks. If I'm reading the code right, using just the shard index means that each shard will have itself as the allowed shard. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And yeah it's only for one block during resharding. |
||
congestion_info.finalize_allowed_shard(own_shard, &all_shards, congestion_seed); | ||
Ok(()) | ||
} | ||
|
||
// TODO(store): Use proper store interface | ||
fn get_chunk_extra( | ||
&self, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is annoying but needed to prevent a deadlock.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds a bit scary 👀, but I assume it has to be this way
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a bit ugly but I don't think it's scary. Rust is smart enough to know not to reuse this variable anymore. Unless you had some other risk in mind?