From 80a6dfff634835a47dbc52929cd1fe0298de67e6 Mon Sep 17 00:00:00 2001 From: Stefan Neamtu Date: Wed, 18 Dec 2024 12:47:35 +0200 Subject: [PATCH 01/13] spawn thread in handle_partial_encoded_state_witness --- .../partial_witness/partial_witness_actor.rs | 73 +- .../partial_witness_actor_v2.rs | 805 ++++++++++++++++++ chain/client/src/test_utils/setup.rs | 1 + integration-tests/src/test_loop/builder.rs | 1 + integration-tests/src/tests/network/runner.rs | 1 + nearcore/src/lib.rs | 1 + 6 files changed, 870 insertions(+), 12 deletions(-) create mode 100644 chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs diff --git a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs index 34f9d0278f6..0f2762e5391 100644 --- a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs +++ b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs @@ -75,6 +75,7 @@ pub struct PartialWitnessActor { /// Same as above for contract deploys. contract_deploys_encoders: ReedSolomonEncoderCache, compile_contracts_spawner: Arc, + partial_witness_spawner: Arc, /// AccountId in the key corresponds to the requester (chunk validator). processed_contract_code_requests: LruCache<(ChunkProductionKey, AccountId), ()>, } @@ -166,6 +167,7 @@ impl PartialWitnessActor { epoch_manager: Arc, runtime: Arc, compile_contracts_spawner: Arc, + partial_witness_spawner: Arc, ) -> Self { let partial_witness_tracker = PartialEncodedStateWitnessTracker::new(client_sender, epoch_manager.clone()); @@ -182,6 +184,7 @@ impl PartialWitnessActor { CONTRACT_DEPLOYS_RATIO_DATA_PARTS, ), compile_contracts_spawner, + partial_witness_spawner, processed_contract_code_requests: LruCache::new( NonZeroUsize::new(PROCESSED_CONTRACT_CODE_REQUESTS_CACHE_SIZE).unwrap(), ), @@ -402,18 +405,45 @@ impl PartialWitnessActor { &mut self, partial_witness: PartialEncodedStateWitness, ) -> Result<(), Error> { - tracing::debug!(target: "client", ?partial_witness, "Receive PartialEncodedStateWitnessMessage"); - let signer = self.my_validator_signer()?; - // Validate the partial encoded state witness and forward the part to all the chunk validators. - if validate_partial_encoded_state_witness( - self.epoch_manager.as_ref(), - &partial_witness, - &signer, - self.runtime.store(), - )? { - self.forward_state_witness_part(partial_witness)?; - } + let epoch_manager = self.epoch_manager.clone(); + let runtime_adapter = self.runtime.clone(); + + let ChunkProductionKey { shard_id, epoch_id, height_created } = + partial_witness.chunk_production_key(); + + let chunk_producer = self + .epoch_manager + .get_chunk_producer_info(&ChunkProductionKey { epoch_id, height_created, shard_id })? + .take_account_id(); + + // Forward witness part to chunk validators except the validator that produced the chunk and witness. + let target_chunk_validators = self + .epoch_manager + .get_chunk_validator_assignments(&epoch_id, shard_id, height_created)? + .ordered_chunk_validators() + .into_iter() + .filter(|validator| validator != &chunk_producer) + .collect(); + + let network_adapter = self.network_adapter.clone(); + + self.partial_witness_spawner.spawn("handle_partial_encoded_state_witness", move || { + tracing::debug!(target: "client", ?partial_witness, "Receive PartialEncodedStateWitnessMessage"); + + // Validate the partial encoded state witness and forward the part to all the chunk validators. + if validate_partial_encoded_state_witness( + epoch_manager.as_ref(), + &partial_witness, + &signer, + runtime_adapter.store(), + ).unwrap() { + forward_state_witness_part_v2(partial_witness, + chunk_producer, + target_chunk_validators, + network_adapter).unwrap(); + } + }); Ok(()) } @@ -598,7 +628,7 @@ impl PartialWitnessActor { /// Sends the contract accesses to the same chunk validators /// (except for the chunk producers that track the same shard), - /// which will receive the state witness for the new chunk. + /// which will receive the state witness for the new chunk. fn send_contract_accesses_to_chunk_validators( &self, key: ChunkProductionKey, @@ -799,3 +829,22 @@ fn contracts_cache_contains_contract( let cache_key = get_contract_cache_key(contract_hash.0, &runtime_config.wasm_config); cache.memory_cache().contains(cache_key) || cache.has(&cache_key).is_ok_and(|has| has) } + +/// Sends the witness part to the chunk validators, except the chunk producer that generated the witness part. +fn forward_state_witness_part_v2( + partial_witness: PartialEncodedStateWitness, + chunk_producer: AccountId, + target_chunk_validators: Vec, + network_adapter: PeerManagerAdapter, +) -> Result<(), Error> { + let ChunkProductionKey { shard_id, epoch_id, height_created } = + partial_witness.chunk_production_key(); + + network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::PartialEncodedStateWitnessForward( + target_chunk_validators, + partial_witness, + ), + )); + Ok(()) +} diff --git a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs new file mode 100644 index 00000000000..02c50312445 --- /dev/null +++ b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs @@ -0,0 +1,805 @@ +use std::collections::HashSet; +use std::num::NonZeroUsize; +use std::sync::Arc; + +use itertools::Itertools; +use lru::LruCache; +use near_async::futures::{AsyncComputationSpawner, AsyncComputationSpawnerExt}; +use near_async::messaging::{Actor, CanSend, Handler, Sender}; +use near_async::time::Clock; +use near_async::{MultiSend, MultiSenderFrom}; +use near_chain::types::RuntimeAdapter; +use near_chain::Error; +use near_chain_configs::MutableValidatorSigner; +use near_epoch_manager::EpochManagerAdapter; +use near_network::state_witness::{ + ChunkContractAccessesMessage, ChunkStateWitnessAckMessage, ContractCodeRequestMessage, + ContractCodeResponseMessage, PartialEncodedContractDeploysMessage, + PartialEncodedStateWitnessForwardMessage, PartialEncodedStateWitnessMessage, +}; +use near_network::types::{NetworkRequests, PeerManagerAdapter, PeerManagerMessageRequest}; +use near_parameters::RuntimeConfig; +use near_performance_metrics_macros::perf; +use near_primitives::reed_solomon::{ReedSolomonEncoder, ReedSolomonEncoderCache}; +use near_primitives::sharding::ShardChunkHeader; +use near_primitives::stateless_validation::contract_distribution::{ + ChunkContractAccesses, ChunkContractDeploys, CodeBytes, CodeHash, ContractCodeRequest, + ContractCodeResponse, ContractUpdates, MainTransitionKey, PartialEncodedContractDeploys, + PartialEncodedContractDeploysPart, +}; +use near_primitives::stateless_validation::partial_witness::PartialEncodedStateWitness; +use near_primitives::stateless_validation::state_witness::{ + ChunkStateWitness, ChunkStateWitnessAck, EncodedChunkStateWitness, +}; +use near_primitives::stateless_validation::stored_chunk_state_transition_data::StoredChunkStateTransitionData; +use near_primitives::stateless_validation::ChunkProductionKey; +use near_primitives::types::{AccountId, EpochId, ShardId}; +use near_primitives::validator_signer::ValidatorSigner; +use near_store::adapter::trie_store::TrieStoreAdapter; +use near_store::{DBCol, StorageError, TrieDBStorage, TrieStorage}; +use near_vm_runner::{get_contract_cache_key, ContractCode, ContractRuntimeCache}; +use rand::Rng; + +use crate::client_actor::ClientSenderForPartialWitness; +use crate::metrics; +use crate::stateless_validation::state_witness_tracker::ChunkStateWitnessTracker; +use crate::stateless_validation::validate::{ + validate_chunk_contract_accesses, validate_contract_code_request, + validate_partial_encoded_contract_deploys, validate_partial_encoded_state_witness, +}; + +use super::encoding::{CONTRACT_DEPLOYS_RATIO_DATA_PARTS, WITNESS_RATIO_DATA_PARTS}; +use super::partial_deploys_tracker::PartialEncodedContractDeploysTracker; +use super::partial_witness_tracker::PartialEncodedStateWitnessTracker; +use near_primitives::utils::compression::CompressedData; + +const PROCESSED_CONTRACT_CODE_REQUESTS_CACHE_SIZE: usize = 30; + +pub struct PartialWitnessActorState { + /// Validator signer to sign the state witness. This field is mutable and optional. Use with caution! + /// Lock the value of mutable validator signer for the duration of a request to ensure consistency. + /// Please note that the locked value should not be stored anywhere or passed through the thread boundary. + my_signer: MutableValidatorSigner, + /// Tracks the parts of the state witness sent from chunk producers to chunk validators. + partial_witness_tracker: PartialEncodedStateWitnessTracker, + partial_deploys_tracker: PartialEncodedContractDeploysTracker, + /// Tracks a collection of state witnesses sent from chunk producers to chunk validators. + state_witness_tracker: ChunkStateWitnessTracker, + /// AccountId in the key corresponds to the requester (chunk validator). + processed_contract_code_requests: LruCache<(ChunkProductionKey, AccountId), ()>, +} + +pub struct PartialWitnessActor { + /// Adapter to send messages to the network. + network_adapter: PeerManagerAdapter, + epoch_manager: Arc, + runtime: Arc, + /// Reed Solomon encoder for encoding state witness parts. + /// We keep one wrapper for each length of chunk_validators to avoid re-creating the encoder. + witness_encoders: ReedSolomonEncoderCache, + /// Same as above for contract deploys. + contract_deploys_encoders: ReedSolomonEncoderCache, + compile_contracts_spawner: Arc, + state: PartialWitnessActorState, +} + +impl Actor for PartialWitnessActor {} + +#[derive(actix::Message, Debug)] +#[rtype(result = "()")] +pub struct DistributeStateWitnessRequest { + pub state_witness: ChunkStateWitness, + pub contract_updates: ContractUpdates, + pub main_transition_shard_id: ShardId, +} + +#[derive(Clone, MultiSend, MultiSenderFrom)] +pub struct PartialWitnessSenderForClient { + pub distribute_chunk_state_witness: Sender, +} + +impl Handler for PartialWitnessActor { + #[perf] + fn handle(&mut self, msg: DistributeStateWitnessRequest) { + if let Err(err) = self.handle_distribute_state_witness_request(msg) { + tracing::error!(target: "client", ?err, "Failed to handle distribute chunk state witness request"); + } + } +} + +impl Handler for PartialWitnessActor { + fn handle(&mut self, msg: ChunkStateWitnessAckMessage) { + self.handle_chunk_state_witness_ack(msg.0); + } +} + +impl Handler for PartialWitnessActor { + fn handle(&mut self, msg: PartialEncodedStateWitnessMessage) { + if let Err(err) = self.handle_partial_encoded_state_witness(msg.0) { + tracing::error!(target: "client", ?err, "Failed to handle PartialEncodedStateWitnessMessage"); + } + } +} + +impl Handler for PartialWitnessActor { + fn handle(&mut self, msg: PartialEncodedStateWitnessForwardMessage) { + if let Err(err) = self.handle_partial_encoded_state_witness_forward(msg.0) { + tracing::error!(target: "client", ?err, "Failed to handle PartialEncodedStateWitnessForwardMessage"); + } + } +} + +impl Handler for PartialWitnessActor { + fn handle(&mut self, msg: ChunkContractAccessesMessage) { + if let Err(err) = self.handle_chunk_contract_accesses(msg.0) { + tracing::error!(target: "client", ?err, "Failed to handle ChunkContractAccessesMessage"); + } + } +} + +impl Handler for PartialWitnessActor { + fn handle(&mut self, msg: PartialEncodedContractDeploysMessage) { + if let Err(err) = self.handle_partial_encoded_contract_deploys(msg.0) { + tracing::error!(target: "client", ?err, "Failed to handle PartialEncodedContractDeploysMessage"); + } + } +} + +impl Handler for PartialWitnessActor { + fn handle(&mut self, msg: ContractCodeRequestMessage) { + if let Err(err) = self.handle_contract_code_request(msg.0) { + tracing::error!(target: "client", ?err, "Failed to handle ContractCodeRequestMessage"); + } + } +} + +impl Handler for PartialWitnessActor { + fn handle(&mut self, msg: ContractCodeResponseMessage) { + if let Err(err) = self.handle_contract_code_response(msg.0) { + tracing::error!(target: "client", ?err, "Failed to handle ContractCodeResponseMessage"); + } + } +} + +impl PartialWitnessActor { + pub fn new( + clock: Clock, + network_adapter: PeerManagerAdapter, + client_sender: ClientSenderForPartialWitness, + my_signer: MutableValidatorSigner, + epoch_manager: Arc, + runtime: Arc, + compile_contracts_spawner: Arc, + ) -> Self { + let partial_witness_tracker = + PartialEncodedStateWitnessTracker::new(client_sender, epoch_manager.clone()); + Self { + network_adapter, + my_signer, + epoch_manager, + partial_witness_tracker, + partial_deploys_tracker: PartialEncodedContractDeploysTracker::new(), + state_witness_tracker: ChunkStateWitnessTracker::new(clock), + runtime, + witness_encoders: ReedSolomonEncoderCache::new(WITNESS_RATIO_DATA_PARTS), + contract_deploys_encoders: ReedSolomonEncoderCache::new( + CONTRACT_DEPLOYS_RATIO_DATA_PARTS, + ), + compile_contracts_spawner, + processed_contract_code_requests: LruCache::new( + NonZeroUsize::new(PROCESSED_CONTRACT_CODE_REQUESTS_CACHE_SIZE).unwrap(), + ), + } + } + + fn handle_distribute_state_witness_request( + &mut self, + msg: DistributeStateWitnessRequest, + ) -> Result<(), Error> { + let DistributeStateWitnessRequest { + state_witness, + contract_updates: ContractUpdates { contract_accesses, contract_deploys }, + main_transition_shard_id, + } = msg; + + tracing::debug!( + target: "client", + chunk_hash=?state_witness.chunk_header.chunk_hash(), + "distribute_chunk_state_witness", + ); + + // We send the state-witness and contract-updates in the following order: + // 1. We send the hashes of the contract code accessed (if contract code is excluded from witness and any contracts are called) + // before the state witness in order to allow validators to check and request missing contract code, while waiting for witness parts. + // 2. We send the state witness parts to witness-part owners. + // 3. We send the contract deploys parts to other validators (that do not validate the witness in this turn). This is lower priority + // since the newly-deployed contracts will be needed by other validators in later turns. + + let signer = self.my_validator_signer()?; + let key = state_witness.chunk_production_key(); + let chunk_validators = self + .epoch_manager + .get_chunk_validator_assignments(&key.epoch_id, key.shard_id, key.height_created) + .expect("Chunk validators must be defined") + .ordered_chunk_validators(); + + if !contract_accesses.is_empty() { + self.send_contract_accesses_to_chunk_validators( + key.clone(), + contract_accesses, + MainTransitionKey { + block_hash: state_witness.main_state_transition.block_hash, + shard_id: main_transition_shard_id, + }, + &chunk_validators, + &signer, + ); + } + + let witness_bytes = compress_witness(&state_witness)?; + self.send_state_witness_parts( + key.epoch_id, + &state_witness.chunk_header, + witness_bytes, + &chunk_validators, + &signer, + )?; + + if !contract_deploys.is_empty() { + self.send_chunk_contract_deploys_parts(key, contract_deploys)?; + } + + Ok(()) + } + + // Function to generate the parts of the state witness and return them as a tuple of chunk_validator and part. + fn generate_state_witness_parts( + &mut self, + epoch_id: EpochId, + chunk_header: &ShardChunkHeader, + witness_bytes: EncodedChunkStateWitness, + chunk_validators: &[AccountId], + signer: &ValidatorSigner, + ) -> Result, Error> { + tracing::debug!( + target: "client", + chunk_hash=?chunk_header.chunk_hash(), + ?chunk_validators, + "generate_state_witness_parts", + ); + + // Break the state witness into parts using Reed Solomon encoding. + let encoder = self.witness_encoders.entry(chunk_validators.len()); + let (parts, encoded_length) = encoder.encode(&witness_bytes); + + Ok(chunk_validators + .iter() + .zip_eq(parts) + .enumerate() + .map(|(part_ord, (chunk_validator, part))| { + // It's fine to unwrap part here as we just constructed the parts above and we expect + // all of them to be present. + let partial_witness = PartialEncodedStateWitness::new( + epoch_id, + chunk_header.clone(), + part_ord, + part.unwrap().to_vec(), + encoded_length, + signer, + ); + (chunk_validator.clone(), partial_witness) + }) + .collect_vec()) + } + + fn generate_contract_deploys_parts( + &mut self, + key: &ChunkProductionKey, + deploys: ChunkContractDeploys, + ) -> Result, Error> { + let validators = self.ordered_contract_deploys_validators(key)?; + // Note that target validators do not include the chunk producers, and thus in some case + // (eg. tests or small networks) there may be no other validators to send the new contracts to. + if validators.is_empty() { + return Ok(vec![]); + } + + let encoder = self.contract_deploys_encoder(validators.len()); + let (parts, encoded_length) = encoder.encode(&deploys); + let signer = self.my_validator_signer()?; + + Ok(validators + .into_iter() + .zip_eq(parts) + .enumerate() + .map(|(part_ord, (validator, part))| { + let partial_deploys = PartialEncodedContractDeploys::new( + key.clone(), + PartialEncodedContractDeploysPart { + part_ord, + data: part.unwrap().to_vec().into_boxed_slice(), + encoded_length, + }, + &signer, + ); + (validator, partial_deploys) + }) + .collect_vec()) + } + + // Break the state witness into parts and send each part to the corresponding chunk validator owner. + // The chunk validator owner will then forward the part to all other chunk validators. + // Each chunk validator would collect the parts and reconstruct the state witness. + fn send_state_witness_parts( + &mut self, + epoch_id: EpochId, + chunk_header: &ShardChunkHeader, + witness_bytes: EncodedChunkStateWitness, + chunk_validators: &[AccountId], + signer: &ValidatorSigner, + ) -> Result<(), Error> { + // Capture these values first, as the sources are consumed before calling record_witness_sent. + let chunk_hash = chunk_header.chunk_hash(); + let witness_size_in_bytes = witness_bytes.size_bytes(); + + // Record time taken to encode the state witness parts. + let shard_id_label = chunk_header.shard_id().to_string(); + let encode_timer = metrics::PARTIAL_WITNESS_ENCODE_TIME + .with_label_values(&[shard_id_label.as_str()]) + .start_timer(); + let validator_witness_tuple = self.generate_state_witness_parts( + epoch_id, + chunk_header, + witness_bytes, + chunk_validators, + signer, + )?; + encode_timer.observe_duration(); + + // Record the witness in order to match the incoming acks for measuring round-trip times. + // See process_chunk_state_witness_ack for the handling of the ack messages. + self.state_witness_tracker.record_witness_sent( + chunk_hash, + witness_size_in_bytes, + validator_witness_tuple.len(), + ); + + // Send the parts to the corresponding chunk validator owners. + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::PartialEncodedStateWitness(validator_witness_tuple), + )); + Ok(()) + } + + /// Sends the witness part to the chunk validators, except the chunk producer that generated the witness part. + fn forward_state_witness_part( + &self, + partial_witness: PartialEncodedStateWitness, + ) -> Result<(), Error> { + let ChunkProductionKey { shard_id, epoch_id, height_created } = + partial_witness.chunk_production_key(); + let chunk_producer = self + .epoch_manager + .get_chunk_producer_info(&ChunkProductionKey { epoch_id, height_created, shard_id })? + .take_account_id(); + + // Forward witness part to chunk validators except the validator that produced the chunk and witness. + let target_chunk_validators = self + .epoch_manager + .get_chunk_validator_assignments(&epoch_id, shard_id, height_created)? + .ordered_chunk_validators() + .into_iter() + .filter(|validator| validator != &chunk_producer) + .collect(); + + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::PartialEncodedStateWitnessForward( + target_chunk_validators, + partial_witness, + ), + )); + Ok(()) + } + + /// Function to handle receiving partial_encoded_state_witness message from chunk producer. + fn handle_partial_encoded_state_witness( + &mut self, + partial_witness: PartialEncodedStateWitness, + ) -> Result<(), Error> { + tracing::debug!(target: "client", ?partial_witness, "Receive PartialEncodedStateWitnessMessage"); + + let signer = self.my_validator_signer()?; + // Validate the partial encoded state witness and forward the part to all the chunk validators. + if validate_partial_encoded_state_witness( + self.epoch_manager.as_ref(), + &partial_witness, + &signer, + self.runtime.store(), + )? { + self.forward_state_witness_part(partial_witness)?; + } + + Ok(()) + } + + /// Function to handle receiving partial_encoded_state_witness_forward message from chunk producer. + fn handle_partial_encoded_state_witness_forward( + &mut self, + partial_witness: PartialEncodedStateWitness, + ) -> Result<(), Error> { + tracing::debug!(target: "client", ?partial_witness, "Receive PartialEncodedStateWitnessForwardMessage"); + + let signer = self.my_validator_signer()?; + // Validate the partial encoded state witness and store the partial encoded state witness. + if validate_partial_encoded_state_witness( + self.epoch_manager.as_ref(), + &partial_witness, + &signer, + self.runtime.store(), + )? { + self.partial_witness_tracker.store_partial_encoded_state_witness(partial_witness)?; + } + + Ok(()) + } + + /// Handles partial contract deploy message received from a peer. + /// + /// This message may belong to one of two steps of distributing contract code. In the first step the code is compressed + /// and encoded into parts using Reed Solomon encoding and each part is sent to one of the validators (part owner). + /// See `send_chunk_contract_deploys_parts` for the code implementing this. In the second step each validator (part-owner) + /// forwards the part it receives to other validators. + fn handle_partial_encoded_contract_deploys( + &mut self, + partial_deploys: PartialEncodedContractDeploys, + ) -> Result<(), Error> { + tracing::debug!(target: "client", ?partial_deploys, "Receive PartialEncodedContractDeploys"); + if !validate_partial_encoded_contract_deploys( + self.epoch_manager.as_ref(), + &partial_deploys, + self.runtime.store(), + )? { + return Ok(()); + } + if self.partial_deploys_tracker.already_processed(&partial_deploys) { + return Ok(()); + } + let key = partial_deploys.chunk_production_key().clone(); + let validators = self.ordered_contract_deploys_validators(&key)?; + if validators.is_empty() { + // Note that target validators do not include the chunk producers, and thus in some case + // (eg. tests or small networks) there may be no other validators to send the new contracts to. + // In such case, the message we are handling here should not be sent in the first place, + // unless there is a bug or adversarial behavior that sends the message. + debug_assert!(false, "No target validators, we must not receive this message"); + return Ok(()); + } + + // Forward to other validators if the part received is my part + let signer = self.my_validator_signer()?; + let my_account_id = signer.validator_id(); + let Some(my_part_ord) = validators.iter().position(|validator| validator == my_account_id) + else { + tracing::warn!( + target: "client", + ?key, + "Validator is not a part of contract deploys distribution" + ); + return Ok(()); + }; + if partial_deploys.part().part_ord == my_part_ord { + let other_validators = validators + .iter() + .filter(|&validator| validator != my_account_id) + .cloned() + .collect_vec(); + if !other_validators.is_empty() { + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::PartialEncodedContractDeploys( + other_validators, + partial_deploys.clone(), + ), + )); + } + } + + // Store part + let encoder = self.contract_deploys_encoder(validators.len()); + if let Some(deploys) = self + .partial_deploys_tracker + .store_partial_encoded_contract_deploys(partial_deploys, encoder)? + { + let contracts = match deploys.decompress_contracts() { + Ok(contracts) => contracts, + Err(err) => { + tracing::warn!( + target: "client", + ?err, + ?key, + "Failed to decompress deployed contracts." + ); + return Ok(()); + } + }; + let contract_codes = contracts.into_iter().map(|contract| contract.into()).collect(); + let runtime = self.runtime.clone(); + self.compile_contracts_spawner.spawn("precompile_deployed_contracts", move || { + if let Err(err) = runtime.precompile_contracts(&key.epoch_id, contract_codes) { + tracing::error!( + target: "client", + ?err, + ?key, + "Failed to precompile deployed contracts." + ); + } + }); + } + + Ok(()) + } + + /// Handles the state witness ack message from the chunk validator. + /// It computes the round-trip time between sending the state witness and receiving + /// the ack message and updates the corresponding metric with it. + /// Currently we do not raise an error for handling of witness-ack messages, + /// as it is used only for tracking some networking metrics. + fn handle_chunk_state_witness_ack(&mut self, witness_ack: ChunkStateWitnessAck) { + self.state_witness_tracker.on_witness_ack_received(witness_ack); + } + + /// Handles contract code accesses message from chunk producer. + /// This is sent in parallel to a chunk state witness and contains the hashes + /// of the contract code accessed when applying the previous chunk of the witness. + fn handle_chunk_contract_accesses( + &mut self, + accesses: ChunkContractAccesses, + ) -> Result<(), Error> { + let signer = self.my_validator_signer()?; + if !validate_chunk_contract_accesses( + self.epoch_manager.as_ref(), + &accesses, + &signer, + self.runtime.store(), + )? { + return Ok(()); + } + let key = accesses.chunk_production_key(); + let contracts_cache = self.runtime.compiled_contract_cache(); + let runtime_config = self + .runtime + .get_runtime_config(self.epoch_manager.get_epoch_protocol_version(&key.epoch_id)?)?; + let missing_contract_hashes = HashSet::from_iter( + accesses + .contracts() + .iter() + .filter(|&hash| { + !contracts_cache_contains_contract(contracts_cache, hash, &runtime_config) + }) + .cloned(), + ); + if missing_contract_hashes.is_empty() { + return Ok(()); + } + self.partial_witness_tracker + .store_accessed_contract_hashes(key.clone(), missing_contract_hashes.clone())?; + let random_chunk_producer = { + let mut chunk_producers = self + .epoch_manager + .get_epoch_chunk_producers_for_shard(&key.epoch_id, key.shard_id)?; + chunk_producers.swap_remove(rand::thread_rng().gen_range(0..chunk_producers.len())) + }; + let request = ContractCodeRequest::new( + key.clone(), + missing_contract_hashes, + accesses.main_transition().clone(), + &signer, + ); + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::ContractCodeRequest(random_chunk_producer, request), + )); + Ok(()) + } + + /// Sends the contract accesses to the same chunk validators + /// (except for the chunk producers that track the same shard), + /// which will receive the state witness for the new chunk. + fn send_contract_accesses_to_chunk_validators( + &self, + key: ChunkProductionKey, + contract_accesses: HashSet, + main_transition: MainTransitionKey, + chunk_validators: &[AccountId], + my_signer: &ValidatorSigner, + ) { + let chunk_producers: HashSet = self + .epoch_manager + .get_epoch_chunk_producers_for_shard(&key.epoch_id, key.shard_id) + .expect("Chunk producers must be defined") + .into_iter() + .collect(); + + // Exclude chunk producers that track the same shard from the target list, since they track the state that contains the respective code. + let target_chunk_validators = chunk_validators + .iter() + .filter(|validator| !chunk_producers.contains(*validator)) + .cloned() + .collect(); + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::ChunkContractAccesses( + target_chunk_validators, + ChunkContractAccesses::new(key, contract_accesses, main_transition, my_signer), + ), + )); + } + + /// Retrieves the code for the given contract hashes and distributes them to validator in parts. + /// + /// This implements the first step of distributing contract code to validators where the contract codes + /// are compressed and encoded into parts using Reed Solomon encoding, and then each part is sent to + /// one of the validators (part-owner). Second step of the distribution, where each validator (part-owner) + /// forwards the part it receives is implemented in `handle_partial_encoded_contract_deploys`. + fn send_chunk_contract_deploys_parts( + &mut self, + key: ChunkProductionKey, + contract_codes: Vec, + ) -> Result<(), Error> { + let contracts = contract_codes.into_iter().map(|contract| contract.into()).collect(); + let compressed_deploys = ChunkContractDeploys::compress_contracts(&contracts)?; + let validator_parts = self.generate_contract_deploys_parts(&key, compressed_deploys)?; + for (part_owner, deploys_part) in validator_parts.into_iter() { + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::PartialEncodedContractDeploys(vec![part_owner], deploys_part), + )); + } + Ok(()) + } + + /// Handles contract code requests message from chunk validators. + /// As response to this message, sends the contract code requested to + /// the requesting chunk validator for the given hashes of the contract code. + fn handle_contract_code_request(&mut self, request: ContractCodeRequest) -> Result<(), Error> { + if !validate_contract_code_request( + self.epoch_manager.as_ref(), + &request, + self.runtime.store(), + )? { + return Ok(()); + } + + let key = request.chunk_production_key(); + let processed_requests_key = (key.clone(), request.requester().clone()); + if self.processed_contract_code_requests.contains(&processed_requests_key) { + tracing::warn!( + target: "client", + ?processed_requests_key, + "Contract code request from this account was already processed" + ); + return Ok(()); + } + self.processed_contract_code_requests.push(processed_requests_key, ()); + + let _timer = near_chain::stateless_validation::metrics::PROCESS_CONTRACT_CODE_REQUEST_TIME + .with_label_values(&[&key.shard_id.to_string()]) + .start_timer(); + + let main_transition_key = request.main_transition(); + let Some(transition_data) = + self.runtime.store().get_ser::( + DBCol::StateTransitionData, + &near_primitives::utils::get_block_shard_id( + &main_transition_key.block_hash, + main_transition_key.shard_id, + ), + )? + else { + tracing::warn!( + target: "client", + ?key, + ?main_transition_key, + "Missing state transition data" + ); + return Ok(()); + }; + let valid_accesses: HashSet = + transition_data.contract_accesses().iter().cloned().collect(); + + let storage = TrieDBStorage::new( + TrieStoreAdapter::new(self.runtime.store().clone()), + self.epoch_manager.shard_id_to_uid( + main_transition_key.shard_id, + &self.epoch_manager.get_epoch_id(&main_transition_key.block_hash)?, + )?, + ); + let mut contracts = Vec::new(); + for contract_hash in request.contracts() { + if !valid_accesses.contains(contract_hash) { + tracing::warn!( + target: "client", + ?key, + ?contract_hash, + "Requested contract code was not accessed when applying the chunk" + ); + return Ok(()); + } + match storage.retrieve_raw_bytes(&contract_hash.0) { + Ok(bytes) => contracts.push(CodeBytes(bytes)), + Err(StorageError::MissingTrieValue(_, _)) => { + tracing::warn!( + target: "client", + ?contract_hash, + chunk_production_key = ?key, + "Requested contract hash is not present in the storage" + ); + return Ok(()); + } + Err(err) => return Err(err.into()), + } + } + let response = ContractCodeResponse::encode(key.clone(), &contracts)?; + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::ContractCodeResponse(request.requester().clone(), response), + )); + Ok(()) + } + + /// Handles contract code responses message from chunk producer. + fn handle_contract_code_response( + &mut self, + response: ContractCodeResponse, + ) -> Result<(), Error> { + let key = response.chunk_production_key().clone(); + let contracts = response.decompress_contracts()?; + self.partial_witness_tracker.store_accessed_contract_codes(key, contracts) + } + + fn my_validator_signer(&self) -> Result, Error> { + self.my_signer.get().ok_or_else(|| Error::NotAValidator("not a validator".to_owned())) + } + + fn contract_deploys_encoder(&mut self, validators_count: usize) -> Arc { + self.contract_deploys_encoders.entry(validators_count) + } + + fn ordered_contract_deploys_validators( + &mut self, + key: &ChunkProductionKey, + ) -> Result, Error> { + let chunk_producers = HashSet::::from_iter( + self.epoch_manager.get_epoch_chunk_producers_for_shard(&key.epoch_id, key.shard_id)?, + ); + let mut validators = self + .epoch_manager + .get_epoch_all_validators(&key.epoch_id)? + .into_iter() + .filter(|stake| !chunk_producers.contains(stake.account_id())) + .map(|stake| stake.account_id().clone()) + .collect::>(); + validators.sort(); + Ok(validators) + } +} + +fn compress_witness(witness: &ChunkStateWitness) -> Result { + let shard_id_label = witness.chunk_header.shard_id().to_string(); + let encode_timer = near_chain::stateless_validation::metrics::CHUNK_STATE_WITNESS_ENCODE_TIME + .with_label_values(&[shard_id_label.as_str()]) + .start_timer(); + let (witness_bytes, raw_witness_size) = EncodedChunkStateWitness::encode(witness)?; + encode_timer.observe_duration(); + + near_chain::stateless_validation::metrics::record_witness_size_metrics( + raw_witness_size, + witness_bytes.size_bytes(), + witness, + ); + Ok(witness_bytes) +} + +fn contracts_cache_contains_contract( + cache: &dyn ContractRuntimeCache, + contract_hash: &CodeHash, + runtime_config: &RuntimeConfig, +) -> bool { + let cache_key = get_contract_cache_key(contract_hash.0, &runtime_config.wasm_config); + cache.memory_cache().contains(cache_key) || cache.has(&cache_key).is_ok_and(|has| has) +} diff --git a/chain/client/src/test_utils/setup.rs b/chain/client/src/test_utils/setup.rs index 22ca09bde99..37422fd7597 100644 --- a/chain/client/src/test_utils/setup.rs +++ b/chain/client/src/test_utils/setup.rs @@ -164,6 +164,7 @@ pub fn setup( epoch_manager.clone(), runtime.clone(), Arc::new(RayonAsyncComputationSpawner), + Arc::new(RayonAsyncComputationSpawner), )); let partial_witness_adapter = partial_witness_addr.with_auto_span_context(); diff --git a/integration-tests/src/test_loop/builder.rs b/integration-tests/src/test_loop/builder.rs index ae89b2eb31b..b8064024d16 100644 --- a/integration-tests/src/test_loop/builder.rs +++ b/integration-tests/src/test_loop/builder.rs @@ -724,6 +724,7 @@ impl TestLoopBuilder { epoch_manager.clone(), runtime_adapter.clone(), Arc::new(self.test_loop.async_computation_spawner(|_| Duration::milliseconds(80))), + Arc::new(self.test_loop.async_computation_spawner(|_| Duration::milliseconds(80))), ); let gc_actor = GCActor::new( diff --git a/integration-tests/src/tests/network/runner.rs b/integration-tests/src/tests/network/runner.rs index 8eecd5a410a..a6fe267d014 100644 --- a/integration-tests/src/tests/network/runner.rs +++ b/integration-tests/src/tests/network/runner.rs @@ -148,6 +148,7 @@ fn setup_network_node( epoch_manager, runtime, Arc::new(RayonAsyncComputationSpawner), + Arc::new(RayonAsyncComputationSpawner), )); shards_manager_adapter.bind(shards_manager_actor.with_auto_span_context()); let peer_manager = PeerManagerActor::spawn( diff --git a/nearcore/src/lib.rs b/nearcore/src/lib.rs index 0b255586fa1..c7619d6c7b3 100644 --- a/nearcore/src/lib.rs +++ b/nearcore/src/lib.rs @@ -372,6 +372,7 @@ pub fn start_with_config_and_synchronization( epoch_manager.clone(), runtime.clone(), Arc::new(RayonAsyncComputationSpawner), + Arc::new(RayonAsyncComputationSpawner), )); let (_gc_actor, gc_arbiter) = spawn_actix_actor(GCActor::new( From 36136198d86c6857debfbcc6141d7206efe7196b Mon Sep 17 00:00:00 2001 From: Stefan Neamtu Date: Wed, 18 Dec 2024 12:48:00 +0200 Subject: [PATCH 02/13] remove file --- .../partial_witness_actor_v2.rs | 805 ------------------ 1 file changed, 805 deletions(-) delete mode 100644 chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs diff --git a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs deleted file mode 100644 index 02c50312445..00000000000 --- a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs +++ /dev/null @@ -1,805 +0,0 @@ -use std::collections::HashSet; -use std::num::NonZeroUsize; -use std::sync::Arc; - -use itertools::Itertools; -use lru::LruCache; -use near_async::futures::{AsyncComputationSpawner, AsyncComputationSpawnerExt}; -use near_async::messaging::{Actor, CanSend, Handler, Sender}; -use near_async::time::Clock; -use near_async::{MultiSend, MultiSenderFrom}; -use near_chain::types::RuntimeAdapter; -use near_chain::Error; -use near_chain_configs::MutableValidatorSigner; -use near_epoch_manager::EpochManagerAdapter; -use near_network::state_witness::{ - ChunkContractAccessesMessage, ChunkStateWitnessAckMessage, ContractCodeRequestMessage, - ContractCodeResponseMessage, PartialEncodedContractDeploysMessage, - PartialEncodedStateWitnessForwardMessage, PartialEncodedStateWitnessMessage, -}; -use near_network::types::{NetworkRequests, PeerManagerAdapter, PeerManagerMessageRequest}; -use near_parameters::RuntimeConfig; -use near_performance_metrics_macros::perf; -use near_primitives::reed_solomon::{ReedSolomonEncoder, ReedSolomonEncoderCache}; -use near_primitives::sharding::ShardChunkHeader; -use near_primitives::stateless_validation::contract_distribution::{ - ChunkContractAccesses, ChunkContractDeploys, CodeBytes, CodeHash, ContractCodeRequest, - ContractCodeResponse, ContractUpdates, MainTransitionKey, PartialEncodedContractDeploys, - PartialEncodedContractDeploysPart, -}; -use near_primitives::stateless_validation::partial_witness::PartialEncodedStateWitness; -use near_primitives::stateless_validation::state_witness::{ - ChunkStateWitness, ChunkStateWitnessAck, EncodedChunkStateWitness, -}; -use near_primitives::stateless_validation::stored_chunk_state_transition_data::StoredChunkStateTransitionData; -use near_primitives::stateless_validation::ChunkProductionKey; -use near_primitives::types::{AccountId, EpochId, ShardId}; -use near_primitives::validator_signer::ValidatorSigner; -use near_store::adapter::trie_store::TrieStoreAdapter; -use near_store::{DBCol, StorageError, TrieDBStorage, TrieStorage}; -use near_vm_runner::{get_contract_cache_key, ContractCode, ContractRuntimeCache}; -use rand::Rng; - -use crate::client_actor::ClientSenderForPartialWitness; -use crate::metrics; -use crate::stateless_validation::state_witness_tracker::ChunkStateWitnessTracker; -use crate::stateless_validation::validate::{ - validate_chunk_contract_accesses, validate_contract_code_request, - validate_partial_encoded_contract_deploys, validate_partial_encoded_state_witness, -}; - -use super::encoding::{CONTRACT_DEPLOYS_RATIO_DATA_PARTS, WITNESS_RATIO_DATA_PARTS}; -use super::partial_deploys_tracker::PartialEncodedContractDeploysTracker; -use super::partial_witness_tracker::PartialEncodedStateWitnessTracker; -use near_primitives::utils::compression::CompressedData; - -const PROCESSED_CONTRACT_CODE_REQUESTS_CACHE_SIZE: usize = 30; - -pub struct PartialWitnessActorState { - /// Validator signer to sign the state witness. This field is mutable and optional. Use with caution! - /// Lock the value of mutable validator signer for the duration of a request to ensure consistency. - /// Please note that the locked value should not be stored anywhere or passed through the thread boundary. - my_signer: MutableValidatorSigner, - /// Tracks the parts of the state witness sent from chunk producers to chunk validators. - partial_witness_tracker: PartialEncodedStateWitnessTracker, - partial_deploys_tracker: PartialEncodedContractDeploysTracker, - /// Tracks a collection of state witnesses sent from chunk producers to chunk validators. - state_witness_tracker: ChunkStateWitnessTracker, - /// AccountId in the key corresponds to the requester (chunk validator). - processed_contract_code_requests: LruCache<(ChunkProductionKey, AccountId), ()>, -} - -pub struct PartialWitnessActor { - /// Adapter to send messages to the network. - network_adapter: PeerManagerAdapter, - epoch_manager: Arc, - runtime: Arc, - /// Reed Solomon encoder for encoding state witness parts. - /// We keep one wrapper for each length of chunk_validators to avoid re-creating the encoder. - witness_encoders: ReedSolomonEncoderCache, - /// Same as above for contract deploys. - contract_deploys_encoders: ReedSolomonEncoderCache, - compile_contracts_spawner: Arc, - state: PartialWitnessActorState, -} - -impl Actor for PartialWitnessActor {} - -#[derive(actix::Message, Debug)] -#[rtype(result = "()")] -pub struct DistributeStateWitnessRequest { - pub state_witness: ChunkStateWitness, - pub contract_updates: ContractUpdates, - pub main_transition_shard_id: ShardId, -} - -#[derive(Clone, MultiSend, MultiSenderFrom)] -pub struct PartialWitnessSenderForClient { - pub distribute_chunk_state_witness: Sender, -} - -impl Handler for PartialWitnessActor { - #[perf] - fn handle(&mut self, msg: DistributeStateWitnessRequest) { - if let Err(err) = self.handle_distribute_state_witness_request(msg) { - tracing::error!(target: "client", ?err, "Failed to handle distribute chunk state witness request"); - } - } -} - -impl Handler for PartialWitnessActor { - fn handle(&mut self, msg: ChunkStateWitnessAckMessage) { - self.handle_chunk_state_witness_ack(msg.0); - } -} - -impl Handler for PartialWitnessActor { - fn handle(&mut self, msg: PartialEncodedStateWitnessMessage) { - if let Err(err) = self.handle_partial_encoded_state_witness(msg.0) { - tracing::error!(target: "client", ?err, "Failed to handle PartialEncodedStateWitnessMessage"); - } - } -} - -impl Handler for PartialWitnessActor { - fn handle(&mut self, msg: PartialEncodedStateWitnessForwardMessage) { - if let Err(err) = self.handle_partial_encoded_state_witness_forward(msg.0) { - tracing::error!(target: "client", ?err, "Failed to handle PartialEncodedStateWitnessForwardMessage"); - } - } -} - -impl Handler for PartialWitnessActor { - fn handle(&mut self, msg: ChunkContractAccessesMessage) { - if let Err(err) = self.handle_chunk_contract_accesses(msg.0) { - tracing::error!(target: "client", ?err, "Failed to handle ChunkContractAccessesMessage"); - } - } -} - -impl Handler for PartialWitnessActor { - fn handle(&mut self, msg: PartialEncodedContractDeploysMessage) { - if let Err(err) = self.handle_partial_encoded_contract_deploys(msg.0) { - tracing::error!(target: "client", ?err, "Failed to handle PartialEncodedContractDeploysMessage"); - } - } -} - -impl Handler for PartialWitnessActor { - fn handle(&mut self, msg: ContractCodeRequestMessage) { - if let Err(err) = self.handle_contract_code_request(msg.0) { - tracing::error!(target: "client", ?err, "Failed to handle ContractCodeRequestMessage"); - } - } -} - -impl Handler for PartialWitnessActor { - fn handle(&mut self, msg: ContractCodeResponseMessage) { - if let Err(err) = self.handle_contract_code_response(msg.0) { - tracing::error!(target: "client", ?err, "Failed to handle ContractCodeResponseMessage"); - } - } -} - -impl PartialWitnessActor { - pub fn new( - clock: Clock, - network_adapter: PeerManagerAdapter, - client_sender: ClientSenderForPartialWitness, - my_signer: MutableValidatorSigner, - epoch_manager: Arc, - runtime: Arc, - compile_contracts_spawner: Arc, - ) -> Self { - let partial_witness_tracker = - PartialEncodedStateWitnessTracker::new(client_sender, epoch_manager.clone()); - Self { - network_adapter, - my_signer, - epoch_manager, - partial_witness_tracker, - partial_deploys_tracker: PartialEncodedContractDeploysTracker::new(), - state_witness_tracker: ChunkStateWitnessTracker::new(clock), - runtime, - witness_encoders: ReedSolomonEncoderCache::new(WITNESS_RATIO_DATA_PARTS), - contract_deploys_encoders: ReedSolomonEncoderCache::new( - CONTRACT_DEPLOYS_RATIO_DATA_PARTS, - ), - compile_contracts_spawner, - processed_contract_code_requests: LruCache::new( - NonZeroUsize::new(PROCESSED_CONTRACT_CODE_REQUESTS_CACHE_SIZE).unwrap(), - ), - } - } - - fn handle_distribute_state_witness_request( - &mut self, - msg: DistributeStateWitnessRequest, - ) -> Result<(), Error> { - let DistributeStateWitnessRequest { - state_witness, - contract_updates: ContractUpdates { contract_accesses, contract_deploys }, - main_transition_shard_id, - } = msg; - - tracing::debug!( - target: "client", - chunk_hash=?state_witness.chunk_header.chunk_hash(), - "distribute_chunk_state_witness", - ); - - // We send the state-witness and contract-updates in the following order: - // 1. We send the hashes of the contract code accessed (if contract code is excluded from witness and any contracts are called) - // before the state witness in order to allow validators to check and request missing contract code, while waiting for witness parts. - // 2. We send the state witness parts to witness-part owners. - // 3. We send the contract deploys parts to other validators (that do not validate the witness in this turn). This is lower priority - // since the newly-deployed contracts will be needed by other validators in later turns. - - let signer = self.my_validator_signer()?; - let key = state_witness.chunk_production_key(); - let chunk_validators = self - .epoch_manager - .get_chunk_validator_assignments(&key.epoch_id, key.shard_id, key.height_created) - .expect("Chunk validators must be defined") - .ordered_chunk_validators(); - - if !contract_accesses.is_empty() { - self.send_contract_accesses_to_chunk_validators( - key.clone(), - contract_accesses, - MainTransitionKey { - block_hash: state_witness.main_state_transition.block_hash, - shard_id: main_transition_shard_id, - }, - &chunk_validators, - &signer, - ); - } - - let witness_bytes = compress_witness(&state_witness)?; - self.send_state_witness_parts( - key.epoch_id, - &state_witness.chunk_header, - witness_bytes, - &chunk_validators, - &signer, - )?; - - if !contract_deploys.is_empty() { - self.send_chunk_contract_deploys_parts(key, contract_deploys)?; - } - - Ok(()) - } - - // Function to generate the parts of the state witness and return them as a tuple of chunk_validator and part. - fn generate_state_witness_parts( - &mut self, - epoch_id: EpochId, - chunk_header: &ShardChunkHeader, - witness_bytes: EncodedChunkStateWitness, - chunk_validators: &[AccountId], - signer: &ValidatorSigner, - ) -> Result, Error> { - tracing::debug!( - target: "client", - chunk_hash=?chunk_header.chunk_hash(), - ?chunk_validators, - "generate_state_witness_parts", - ); - - // Break the state witness into parts using Reed Solomon encoding. - let encoder = self.witness_encoders.entry(chunk_validators.len()); - let (parts, encoded_length) = encoder.encode(&witness_bytes); - - Ok(chunk_validators - .iter() - .zip_eq(parts) - .enumerate() - .map(|(part_ord, (chunk_validator, part))| { - // It's fine to unwrap part here as we just constructed the parts above and we expect - // all of them to be present. - let partial_witness = PartialEncodedStateWitness::new( - epoch_id, - chunk_header.clone(), - part_ord, - part.unwrap().to_vec(), - encoded_length, - signer, - ); - (chunk_validator.clone(), partial_witness) - }) - .collect_vec()) - } - - fn generate_contract_deploys_parts( - &mut self, - key: &ChunkProductionKey, - deploys: ChunkContractDeploys, - ) -> Result, Error> { - let validators = self.ordered_contract_deploys_validators(key)?; - // Note that target validators do not include the chunk producers, and thus in some case - // (eg. tests or small networks) there may be no other validators to send the new contracts to. - if validators.is_empty() { - return Ok(vec![]); - } - - let encoder = self.contract_deploys_encoder(validators.len()); - let (parts, encoded_length) = encoder.encode(&deploys); - let signer = self.my_validator_signer()?; - - Ok(validators - .into_iter() - .zip_eq(parts) - .enumerate() - .map(|(part_ord, (validator, part))| { - let partial_deploys = PartialEncodedContractDeploys::new( - key.clone(), - PartialEncodedContractDeploysPart { - part_ord, - data: part.unwrap().to_vec().into_boxed_slice(), - encoded_length, - }, - &signer, - ); - (validator, partial_deploys) - }) - .collect_vec()) - } - - // Break the state witness into parts and send each part to the corresponding chunk validator owner. - // The chunk validator owner will then forward the part to all other chunk validators. - // Each chunk validator would collect the parts and reconstruct the state witness. - fn send_state_witness_parts( - &mut self, - epoch_id: EpochId, - chunk_header: &ShardChunkHeader, - witness_bytes: EncodedChunkStateWitness, - chunk_validators: &[AccountId], - signer: &ValidatorSigner, - ) -> Result<(), Error> { - // Capture these values first, as the sources are consumed before calling record_witness_sent. - let chunk_hash = chunk_header.chunk_hash(); - let witness_size_in_bytes = witness_bytes.size_bytes(); - - // Record time taken to encode the state witness parts. - let shard_id_label = chunk_header.shard_id().to_string(); - let encode_timer = metrics::PARTIAL_WITNESS_ENCODE_TIME - .with_label_values(&[shard_id_label.as_str()]) - .start_timer(); - let validator_witness_tuple = self.generate_state_witness_parts( - epoch_id, - chunk_header, - witness_bytes, - chunk_validators, - signer, - )?; - encode_timer.observe_duration(); - - // Record the witness in order to match the incoming acks for measuring round-trip times. - // See process_chunk_state_witness_ack for the handling of the ack messages. - self.state_witness_tracker.record_witness_sent( - chunk_hash, - witness_size_in_bytes, - validator_witness_tuple.len(), - ); - - // Send the parts to the corresponding chunk validator owners. - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::PartialEncodedStateWitness(validator_witness_tuple), - )); - Ok(()) - } - - /// Sends the witness part to the chunk validators, except the chunk producer that generated the witness part. - fn forward_state_witness_part( - &self, - partial_witness: PartialEncodedStateWitness, - ) -> Result<(), Error> { - let ChunkProductionKey { shard_id, epoch_id, height_created } = - partial_witness.chunk_production_key(); - let chunk_producer = self - .epoch_manager - .get_chunk_producer_info(&ChunkProductionKey { epoch_id, height_created, shard_id })? - .take_account_id(); - - // Forward witness part to chunk validators except the validator that produced the chunk and witness. - let target_chunk_validators = self - .epoch_manager - .get_chunk_validator_assignments(&epoch_id, shard_id, height_created)? - .ordered_chunk_validators() - .into_iter() - .filter(|validator| validator != &chunk_producer) - .collect(); - - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::PartialEncodedStateWitnessForward( - target_chunk_validators, - partial_witness, - ), - )); - Ok(()) - } - - /// Function to handle receiving partial_encoded_state_witness message from chunk producer. - fn handle_partial_encoded_state_witness( - &mut self, - partial_witness: PartialEncodedStateWitness, - ) -> Result<(), Error> { - tracing::debug!(target: "client", ?partial_witness, "Receive PartialEncodedStateWitnessMessage"); - - let signer = self.my_validator_signer()?; - // Validate the partial encoded state witness and forward the part to all the chunk validators. - if validate_partial_encoded_state_witness( - self.epoch_manager.as_ref(), - &partial_witness, - &signer, - self.runtime.store(), - )? { - self.forward_state_witness_part(partial_witness)?; - } - - Ok(()) - } - - /// Function to handle receiving partial_encoded_state_witness_forward message from chunk producer. - fn handle_partial_encoded_state_witness_forward( - &mut self, - partial_witness: PartialEncodedStateWitness, - ) -> Result<(), Error> { - tracing::debug!(target: "client", ?partial_witness, "Receive PartialEncodedStateWitnessForwardMessage"); - - let signer = self.my_validator_signer()?; - // Validate the partial encoded state witness and store the partial encoded state witness. - if validate_partial_encoded_state_witness( - self.epoch_manager.as_ref(), - &partial_witness, - &signer, - self.runtime.store(), - )? { - self.partial_witness_tracker.store_partial_encoded_state_witness(partial_witness)?; - } - - Ok(()) - } - - /// Handles partial contract deploy message received from a peer. - /// - /// This message may belong to one of two steps of distributing contract code. In the first step the code is compressed - /// and encoded into parts using Reed Solomon encoding and each part is sent to one of the validators (part owner). - /// See `send_chunk_contract_deploys_parts` for the code implementing this. In the second step each validator (part-owner) - /// forwards the part it receives to other validators. - fn handle_partial_encoded_contract_deploys( - &mut self, - partial_deploys: PartialEncodedContractDeploys, - ) -> Result<(), Error> { - tracing::debug!(target: "client", ?partial_deploys, "Receive PartialEncodedContractDeploys"); - if !validate_partial_encoded_contract_deploys( - self.epoch_manager.as_ref(), - &partial_deploys, - self.runtime.store(), - )? { - return Ok(()); - } - if self.partial_deploys_tracker.already_processed(&partial_deploys) { - return Ok(()); - } - let key = partial_deploys.chunk_production_key().clone(); - let validators = self.ordered_contract_deploys_validators(&key)?; - if validators.is_empty() { - // Note that target validators do not include the chunk producers, and thus in some case - // (eg. tests or small networks) there may be no other validators to send the new contracts to. - // In such case, the message we are handling here should not be sent in the first place, - // unless there is a bug or adversarial behavior that sends the message. - debug_assert!(false, "No target validators, we must not receive this message"); - return Ok(()); - } - - // Forward to other validators if the part received is my part - let signer = self.my_validator_signer()?; - let my_account_id = signer.validator_id(); - let Some(my_part_ord) = validators.iter().position(|validator| validator == my_account_id) - else { - tracing::warn!( - target: "client", - ?key, - "Validator is not a part of contract deploys distribution" - ); - return Ok(()); - }; - if partial_deploys.part().part_ord == my_part_ord { - let other_validators = validators - .iter() - .filter(|&validator| validator != my_account_id) - .cloned() - .collect_vec(); - if !other_validators.is_empty() { - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::PartialEncodedContractDeploys( - other_validators, - partial_deploys.clone(), - ), - )); - } - } - - // Store part - let encoder = self.contract_deploys_encoder(validators.len()); - if let Some(deploys) = self - .partial_deploys_tracker - .store_partial_encoded_contract_deploys(partial_deploys, encoder)? - { - let contracts = match deploys.decompress_contracts() { - Ok(contracts) => contracts, - Err(err) => { - tracing::warn!( - target: "client", - ?err, - ?key, - "Failed to decompress deployed contracts." - ); - return Ok(()); - } - }; - let contract_codes = contracts.into_iter().map(|contract| contract.into()).collect(); - let runtime = self.runtime.clone(); - self.compile_contracts_spawner.spawn("precompile_deployed_contracts", move || { - if let Err(err) = runtime.precompile_contracts(&key.epoch_id, contract_codes) { - tracing::error!( - target: "client", - ?err, - ?key, - "Failed to precompile deployed contracts." - ); - } - }); - } - - Ok(()) - } - - /// Handles the state witness ack message from the chunk validator. - /// It computes the round-trip time between sending the state witness and receiving - /// the ack message and updates the corresponding metric with it. - /// Currently we do not raise an error for handling of witness-ack messages, - /// as it is used only for tracking some networking metrics. - fn handle_chunk_state_witness_ack(&mut self, witness_ack: ChunkStateWitnessAck) { - self.state_witness_tracker.on_witness_ack_received(witness_ack); - } - - /// Handles contract code accesses message from chunk producer. - /// This is sent in parallel to a chunk state witness and contains the hashes - /// of the contract code accessed when applying the previous chunk of the witness. - fn handle_chunk_contract_accesses( - &mut self, - accesses: ChunkContractAccesses, - ) -> Result<(), Error> { - let signer = self.my_validator_signer()?; - if !validate_chunk_contract_accesses( - self.epoch_manager.as_ref(), - &accesses, - &signer, - self.runtime.store(), - )? { - return Ok(()); - } - let key = accesses.chunk_production_key(); - let contracts_cache = self.runtime.compiled_contract_cache(); - let runtime_config = self - .runtime - .get_runtime_config(self.epoch_manager.get_epoch_protocol_version(&key.epoch_id)?)?; - let missing_contract_hashes = HashSet::from_iter( - accesses - .contracts() - .iter() - .filter(|&hash| { - !contracts_cache_contains_contract(contracts_cache, hash, &runtime_config) - }) - .cloned(), - ); - if missing_contract_hashes.is_empty() { - return Ok(()); - } - self.partial_witness_tracker - .store_accessed_contract_hashes(key.clone(), missing_contract_hashes.clone())?; - let random_chunk_producer = { - let mut chunk_producers = self - .epoch_manager - .get_epoch_chunk_producers_for_shard(&key.epoch_id, key.shard_id)?; - chunk_producers.swap_remove(rand::thread_rng().gen_range(0..chunk_producers.len())) - }; - let request = ContractCodeRequest::new( - key.clone(), - missing_contract_hashes, - accesses.main_transition().clone(), - &signer, - ); - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::ContractCodeRequest(random_chunk_producer, request), - )); - Ok(()) - } - - /// Sends the contract accesses to the same chunk validators - /// (except for the chunk producers that track the same shard), - /// which will receive the state witness for the new chunk. - fn send_contract_accesses_to_chunk_validators( - &self, - key: ChunkProductionKey, - contract_accesses: HashSet, - main_transition: MainTransitionKey, - chunk_validators: &[AccountId], - my_signer: &ValidatorSigner, - ) { - let chunk_producers: HashSet = self - .epoch_manager - .get_epoch_chunk_producers_for_shard(&key.epoch_id, key.shard_id) - .expect("Chunk producers must be defined") - .into_iter() - .collect(); - - // Exclude chunk producers that track the same shard from the target list, since they track the state that contains the respective code. - let target_chunk_validators = chunk_validators - .iter() - .filter(|validator| !chunk_producers.contains(*validator)) - .cloned() - .collect(); - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::ChunkContractAccesses( - target_chunk_validators, - ChunkContractAccesses::new(key, contract_accesses, main_transition, my_signer), - ), - )); - } - - /// Retrieves the code for the given contract hashes and distributes them to validator in parts. - /// - /// This implements the first step of distributing contract code to validators where the contract codes - /// are compressed and encoded into parts using Reed Solomon encoding, and then each part is sent to - /// one of the validators (part-owner). Second step of the distribution, where each validator (part-owner) - /// forwards the part it receives is implemented in `handle_partial_encoded_contract_deploys`. - fn send_chunk_contract_deploys_parts( - &mut self, - key: ChunkProductionKey, - contract_codes: Vec, - ) -> Result<(), Error> { - let contracts = contract_codes.into_iter().map(|contract| contract.into()).collect(); - let compressed_deploys = ChunkContractDeploys::compress_contracts(&contracts)?; - let validator_parts = self.generate_contract_deploys_parts(&key, compressed_deploys)?; - for (part_owner, deploys_part) in validator_parts.into_iter() { - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::PartialEncodedContractDeploys(vec![part_owner], deploys_part), - )); - } - Ok(()) - } - - /// Handles contract code requests message from chunk validators. - /// As response to this message, sends the contract code requested to - /// the requesting chunk validator for the given hashes of the contract code. - fn handle_contract_code_request(&mut self, request: ContractCodeRequest) -> Result<(), Error> { - if !validate_contract_code_request( - self.epoch_manager.as_ref(), - &request, - self.runtime.store(), - )? { - return Ok(()); - } - - let key = request.chunk_production_key(); - let processed_requests_key = (key.clone(), request.requester().clone()); - if self.processed_contract_code_requests.contains(&processed_requests_key) { - tracing::warn!( - target: "client", - ?processed_requests_key, - "Contract code request from this account was already processed" - ); - return Ok(()); - } - self.processed_contract_code_requests.push(processed_requests_key, ()); - - let _timer = near_chain::stateless_validation::metrics::PROCESS_CONTRACT_CODE_REQUEST_TIME - .with_label_values(&[&key.shard_id.to_string()]) - .start_timer(); - - let main_transition_key = request.main_transition(); - let Some(transition_data) = - self.runtime.store().get_ser::( - DBCol::StateTransitionData, - &near_primitives::utils::get_block_shard_id( - &main_transition_key.block_hash, - main_transition_key.shard_id, - ), - )? - else { - tracing::warn!( - target: "client", - ?key, - ?main_transition_key, - "Missing state transition data" - ); - return Ok(()); - }; - let valid_accesses: HashSet = - transition_data.contract_accesses().iter().cloned().collect(); - - let storage = TrieDBStorage::new( - TrieStoreAdapter::new(self.runtime.store().clone()), - self.epoch_manager.shard_id_to_uid( - main_transition_key.shard_id, - &self.epoch_manager.get_epoch_id(&main_transition_key.block_hash)?, - )?, - ); - let mut contracts = Vec::new(); - for contract_hash in request.contracts() { - if !valid_accesses.contains(contract_hash) { - tracing::warn!( - target: "client", - ?key, - ?contract_hash, - "Requested contract code was not accessed when applying the chunk" - ); - return Ok(()); - } - match storage.retrieve_raw_bytes(&contract_hash.0) { - Ok(bytes) => contracts.push(CodeBytes(bytes)), - Err(StorageError::MissingTrieValue(_, _)) => { - tracing::warn!( - target: "client", - ?contract_hash, - chunk_production_key = ?key, - "Requested contract hash is not present in the storage" - ); - return Ok(()); - } - Err(err) => return Err(err.into()), - } - } - let response = ContractCodeResponse::encode(key.clone(), &contracts)?; - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::ContractCodeResponse(request.requester().clone(), response), - )); - Ok(()) - } - - /// Handles contract code responses message from chunk producer. - fn handle_contract_code_response( - &mut self, - response: ContractCodeResponse, - ) -> Result<(), Error> { - let key = response.chunk_production_key().clone(); - let contracts = response.decompress_contracts()?; - self.partial_witness_tracker.store_accessed_contract_codes(key, contracts) - } - - fn my_validator_signer(&self) -> Result, Error> { - self.my_signer.get().ok_or_else(|| Error::NotAValidator("not a validator".to_owned())) - } - - fn contract_deploys_encoder(&mut self, validators_count: usize) -> Arc { - self.contract_deploys_encoders.entry(validators_count) - } - - fn ordered_contract_deploys_validators( - &mut self, - key: &ChunkProductionKey, - ) -> Result, Error> { - let chunk_producers = HashSet::::from_iter( - self.epoch_manager.get_epoch_chunk_producers_for_shard(&key.epoch_id, key.shard_id)?, - ); - let mut validators = self - .epoch_manager - .get_epoch_all_validators(&key.epoch_id)? - .into_iter() - .filter(|stake| !chunk_producers.contains(stake.account_id())) - .map(|stake| stake.account_id().clone()) - .collect::>(); - validators.sort(); - Ok(validators) - } -} - -fn compress_witness(witness: &ChunkStateWitness) -> Result { - let shard_id_label = witness.chunk_header.shard_id().to_string(); - let encode_timer = near_chain::stateless_validation::metrics::CHUNK_STATE_WITNESS_ENCODE_TIME - .with_label_values(&[shard_id_label.as_str()]) - .start_timer(); - let (witness_bytes, raw_witness_size) = EncodedChunkStateWitness::encode(witness)?; - encode_timer.observe_duration(); - - near_chain::stateless_validation::metrics::record_witness_size_metrics( - raw_witness_size, - witness_bytes.size_bytes(), - witness, - ); - Ok(witness_bytes) -} - -fn contracts_cache_contains_contract( - cache: &dyn ContractRuntimeCache, - contract_hash: &CodeHash, - runtime_config: &RuntimeConfig, -) -> bool { - let cache_key = get_contract_cache_key(contract_hash.0, &runtime_config.wasm_config); - cache.memory_cache().contains(cache_key) || cache.has(&cache_key).is_ok_and(|has| has) -} From c12d23b158bc8b3f7ccf98fad191f0660bb13176 Mon Sep 17 00:00:00 2001 From: Stefan Neamtu Date: Wed, 18 Dec 2024 12:51:45 +0200 Subject: [PATCH 03/13] clippy --- .../partial_witness/partial_witness_actor.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs index 0f2762e5391..45f527c1281 100644 --- a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs +++ b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs @@ -371,7 +371,7 @@ impl PartialWitnessActor { } /// Sends the witness part to the chunk validators, except the chunk producer that generated the witness part. - fn forward_state_witness_part( + fn _forward_state_witness_part( &self, partial_witness: PartialEncodedStateWitness, ) -> Result<(), Error> { @@ -439,7 +439,6 @@ impl PartialWitnessActor { runtime_adapter.store(), ).unwrap() { forward_state_witness_part_v2(partial_witness, - chunk_producer, target_chunk_validators, network_adapter).unwrap(); } @@ -833,13 +832,9 @@ fn contracts_cache_contains_contract( /// Sends the witness part to the chunk validators, except the chunk producer that generated the witness part. fn forward_state_witness_part_v2( partial_witness: PartialEncodedStateWitness, - chunk_producer: AccountId, target_chunk_validators: Vec, network_adapter: PeerManagerAdapter, ) -> Result<(), Error> { - let ChunkProductionKey { shard_id, epoch_id, height_created } = - partial_witness.chunk_production_key(); - network_adapter.send(PeerManagerMessageRequest::NetworkRequests( NetworkRequests::PartialEncodedStateWitnessForward( target_chunk_validators, From 1de100c9c738f8a36da77cca9267b8e238afccc7 Mon Sep 17 00:00:00 2001 From: Stefan Neamtu Date: Thu, 19 Dec 2024 11:56:07 +0200 Subject: [PATCH 04/13] modify handle_partial_encoded_state_witness_forward --- .../partial_witness/partial_witness_actor.rs | 99 ++++++++++++++----- 1 file changed, 74 insertions(+), 25 deletions(-) diff --git a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs index 45f527c1281..f556d64085e 100644 --- a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs +++ b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs @@ -1,6 +1,6 @@ use std::collections::HashSet; use std::num::NonZeroUsize; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use itertools::Itertools; use lru::LruCache; @@ -65,7 +65,7 @@ pub struct PartialWitnessActor { epoch_manager: Arc, runtime: Arc, /// Tracks the parts of the state witness sent from chunk producers to chunk validators. - partial_witness_tracker: PartialEncodedStateWitnessTracker, + partial_witness_tracker: Arc>, partial_deploys_tracker: PartialEncodedContractDeploysTracker, /// Tracks a collection of state witnesses sent from chunk producers to chunk validators. state_witness_tracker: ChunkStateWitnessTracker, @@ -169,8 +169,10 @@ impl PartialWitnessActor { compile_contracts_spawner: Arc, partial_witness_spawner: Arc, ) -> Self { - let partial_witness_tracker = - PartialEncodedStateWitnessTracker::new(client_sender, epoch_manager.clone()); + let partial_witness_tracker = Arc::new(Mutex::new(PartialEncodedStateWitnessTracker::new( + client_sender, + epoch_manager.clone(), + ))); Self { network_adapter, my_signer, @@ -405,6 +407,7 @@ impl PartialWitnessActor { &mut self, partial_witness: PartialEncodedStateWitness, ) -> Result<(), Error> { + tracing::debug!(target: "client", ?partial_witness, "Receive PartialEncodedStateWitnessMessage"); let signer = self.my_validator_signer()?; let epoch_manager = self.epoch_manager.clone(); let runtime_adapter = self.runtime.clone(); @@ -429,18 +432,34 @@ impl PartialWitnessActor { let network_adapter = self.network_adapter.clone(); self.partial_witness_spawner.spawn("handle_partial_encoded_state_witness", move || { - tracing::debug!(target: "client", ?partial_witness, "Receive PartialEncodedStateWitnessMessage"); - // Validate the partial encoded state witness and forward the part to all the chunk validators. - if validate_partial_encoded_state_witness( + let validation = validate_partial_encoded_state_witness( epoch_manager.as_ref(), &partial_witness, &signer, runtime_adapter.store(), - ).unwrap() { - forward_state_witness_part_v2(partial_witness, - target_chunk_validators, - network_adapter).unwrap(); + ); + match validation { + Ok(true) => { + forward_state_witness_part_v2( + partial_witness, + target_chunk_validators, + network_adapter, + ); + } + Ok(false) => { + tracing::warn!( + target: "client", + "Received partial encoded state witness that is not valid" + ); + } + Err(err) => { + tracing::warn!( + target: "client", + "Encountered error during validation: {}", + err + ); + } } }); @@ -455,15 +474,42 @@ impl PartialWitnessActor { tracing::debug!(target: "client", ?partial_witness, "Receive PartialEncodedStateWitnessForwardMessage"); let signer = self.my_validator_signer()?; - // Validate the partial encoded state witness and store the partial encoded state witness. - if validate_partial_encoded_state_witness( - self.epoch_manager.as_ref(), - &partial_witness, - &signer, - self.runtime.store(), - )? { - self.partial_witness_tracker.store_partial_encoded_state_witness(partial_witness)?; - } + let partial_witness_tracker = self.partial_witness_tracker.clone(); + let epoch_manager = self.epoch_manager.clone(); + let runtime_adapter = self.runtime.clone(); + self.partial_witness_spawner.spawn( + "handle_partial_encoded_state_witness_forward", + move || { + // Validate the partial encoded state witness and store the partial encoded state witness. + let validation = validate_partial_encoded_state_witness( + epoch_manager.as_ref(), + &partial_witness, + &signer, + runtime_adapter.store(), + ); + match validation { + Ok(true) => { + let mut partial_witness_tracker = partial_witness_tracker.lock().unwrap(); + partial_witness_tracker + .store_partial_encoded_state_witness(partial_witness) + .unwrap(); + } + Ok(false) => { + tracing::warn!( + target: "client", + "Received partial encoded state witness that is not valid" + ); + } + Err(err) => { + tracing::warn!( + target: "client", + "Encountered error during validation: {}", + err + ); + } + } + }, + ); Ok(()) } @@ -605,8 +651,11 @@ impl PartialWitnessActor { if missing_contract_hashes.is_empty() { return Ok(()); } - self.partial_witness_tracker - .store_accessed_contract_hashes(key.clone(), missing_contract_hashes.clone())?; + { + let mut partial_witness_tracker = self.partial_witness_tracker.lock().unwrap(); + partial_witness_tracker + .store_accessed_contract_hashes(key.clone(), missing_contract_hashes.clone())?; + } let random_chunk_producer = { let mut chunk_producers = self .epoch_manager @@ -774,7 +823,8 @@ impl PartialWitnessActor { ) -> Result<(), Error> { let key = response.chunk_production_key().clone(); let contracts = response.decompress_contracts()?; - self.partial_witness_tracker.store_accessed_contract_codes(key, contracts) + let mut partial_witness_tracker = self.partial_witness_tracker.lock().unwrap(); + partial_witness_tracker.store_accessed_contract_codes(key, contracts) } fn my_validator_signer(&self) -> Result, Error> { @@ -834,12 +884,11 @@ fn forward_state_witness_part_v2( partial_witness: PartialEncodedStateWitness, target_chunk_validators: Vec, network_adapter: PeerManagerAdapter, -) -> Result<(), Error> { +) { network_adapter.send(PeerManagerMessageRequest::NetworkRequests( NetworkRequests::PartialEncodedStateWitnessForward( target_chunk_validators, partial_witness, ), )); - Ok(()) } From 3f63d0335566562ffbeec4122309cccc1a907768 Mon Sep 17 00:00:00 2001 From: Stefan Neamtu Date: Fri, 20 Dec 2024 09:57:31 +0200 Subject: [PATCH 05/13] . --- .../partial_witness/partial_witness_actor.rs | 21 +-- .../partial_witness_tracker.rs | 130 ++++++++++-------- utils/near-cache/src/sync.rs | 8 ++ 3 files changed, 85 insertions(+), 74 deletions(-) diff --git a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs index f556d64085e..e5b6ae56e85 100644 --- a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs +++ b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs @@ -1,6 +1,6 @@ use std::collections::HashSet; use std::num::NonZeroUsize; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use itertools::Itertools; use lru::LruCache; @@ -65,7 +65,7 @@ pub struct PartialWitnessActor { epoch_manager: Arc, runtime: Arc, /// Tracks the parts of the state witness sent from chunk producers to chunk validators. - partial_witness_tracker: Arc>, + partial_witness_tracker: Arc, partial_deploys_tracker: PartialEncodedContractDeploysTracker, /// Tracks a collection of state witnesses sent from chunk producers to chunk validators. state_witness_tracker: ChunkStateWitnessTracker, @@ -169,10 +169,8 @@ impl PartialWitnessActor { compile_contracts_spawner: Arc, partial_witness_spawner: Arc, ) -> Self { - let partial_witness_tracker = Arc::new(Mutex::new(PartialEncodedStateWitnessTracker::new( - client_sender, - epoch_manager.clone(), - ))); + let partial_witness_tracker = + Arc::new(PartialEncodedStateWitnessTracker::new(client_sender, epoch_manager.clone())); Self { network_adapter, my_signer, @@ -489,7 +487,6 @@ impl PartialWitnessActor { ); match validation { Ok(true) => { - let mut partial_witness_tracker = partial_witness_tracker.lock().unwrap(); partial_witness_tracker .store_partial_encoded_state_witness(partial_witness) .unwrap(); @@ -651,11 +648,8 @@ impl PartialWitnessActor { if missing_contract_hashes.is_empty() { return Ok(()); } - { - let mut partial_witness_tracker = self.partial_witness_tracker.lock().unwrap(); - partial_witness_tracker - .store_accessed_contract_hashes(key.clone(), missing_contract_hashes.clone())?; - } + self.partial_witness_tracker + .store_accessed_contract_hashes(key.clone(), missing_contract_hashes.clone())?; let random_chunk_producer = { let mut chunk_producers = self .epoch_manager @@ -823,8 +817,7 @@ impl PartialWitnessActor { ) -> Result<(), Error> { let key = response.chunk_production_key().clone(); let contracts = response.decompress_contracts()?; - let mut partial_witness_tracker = self.partial_witness_tracker.lock().unwrap(); - partial_witness_tracker.store_accessed_contract_codes(key, contracts) + self.partial_witness_tracker.store_accessed_contract_codes(key, contracts) } fn my_validator_signer(&self) -> Result, Error> { diff --git a/chain/client/src/stateless_validation/partial_witness/partial_witness_tracker.rs b/chain/client/src/stateless_validation/partial_witness/partial_witness_tracker.rs index 4beb1b1ff3d..89d2091be8b 100644 --- a/chain/client/src/stateless_validation/partial_witness/partial_witness_tracker.rs +++ b/chain/client/src/stateless_validation/partial_witness/partial_witness_tracker.rs @@ -1,10 +1,11 @@ use std::collections::HashSet; use std::num::NonZeroUsize; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use lru::LruCache; use near_async::messaging::CanSend; use near_async::time::Instant; +use near_cache::SyncLruCache; use near_chain::chain::ChunkStateWitnessMessage; use near_chain::Error; use near_epoch_manager::EpochManagerAdapter; @@ -308,13 +309,13 @@ pub struct PartialEncodedStateWitnessTracker { /// Epoch manager to get the set of chunk validators epoch_manager: Arc, /// Keeps track of state witness parts received from chunk producers. - parts_cache: LruCache, + parts_cache: Arc>>, /// Keeps track of the already decoded witnesses. This is needed /// to protect chunk validator from processing the same witness multiple /// times. - processed_witnesses: LruCache, + processed_witnesses: Arc>, /// Reed Solomon encoder for decoding state witness parts. - encoders: ReedSolomonEncoderCache, + encoders: Arc>, } impl PartialEncodedStateWitnessTracker { @@ -325,16 +326,16 @@ impl PartialEncodedStateWitnessTracker { Self { client_sender, epoch_manager, - parts_cache: LruCache::new(NonZeroUsize::new(WITNESS_PARTS_CACHE_SIZE).unwrap()), - processed_witnesses: LruCache::new( - NonZeroUsize::new(PROCESSED_WITNESSES_CACHE_SIZE).unwrap(), - ), - encoders: ReedSolomonEncoderCache::new(WITNESS_RATIO_DATA_PARTS), + parts_cache: Arc::new(Mutex::new(LruCache::new( + NonZeroUsize::new(WITNESS_PARTS_CACHE_SIZE).unwrap(), + ))), + processed_witnesses: Arc::new(SyncLruCache::new(PROCESSED_WITNESSES_CACHE_SIZE)), + encoders: Arc::new(Mutex::new(ReedSolomonEncoderCache::new(WITNESS_RATIO_DATA_PARTS))), } } pub fn store_partial_encoded_state_witness( - &mut self, + &self, partial_witness: PartialEncodedStateWitness, ) -> Result<(), Error> { tracing::debug!(target: "client", ?partial_witness, "store_partial_encoded_state_witness"); @@ -345,7 +346,7 @@ impl PartialEncodedStateWitnessTracker { } pub fn store_accessed_contract_hashes( - &mut self, + &self, key: ChunkProductionKey, hashes: HashSet, ) -> Result<(), Error> { @@ -355,7 +356,7 @@ impl PartialEncodedStateWitnessTracker { } pub fn store_accessed_contract_codes( - &mut self, + &self, key: ChunkProductionKey, codes: Vec, ) -> Result<(), Error> { @@ -365,7 +366,7 @@ impl PartialEncodedStateWitnessTracker { } fn process_update( - &mut self, + &self, key: ChunkProductionKey, create_if_not_exists: bool, update: CacheUpdate, @@ -382,72 +383,80 @@ impl PartialEncodedStateWitnessTracker { if create_if_not_exists { self.maybe_insert_new_entry_in_parts_cache(&key); } - let Some(entry) = self.parts_cache.get_mut(&key) else { - return Ok(()); - }; - if let Some((decode_result, accessed_contracts)) = entry.update(update) { - // Record the time taken from receiving first part to decoding partial witness. - let time_to_last_part = Instant::now().signed_duration_since(entry.created_at); - metrics::PARTIAL_WITNESS_TIME_TO_LAST_PART - .with_label_values(&[key.shard_id.to_string().as_str()]) - .observe(time_to_last_part.as_seconds_f64()); - - self.parts_cache.pop(&key); - self.processed_witnesses.push(key.clone(), ()); - - let encoded_witness = match decode_result { - Ok(encoded_chunk_state_witness) => encoded_chunk_state_witness, - Err(err) => { - // We ideally never expect the decoding to fail. In case it does, we received a bad part - // from the chunk producer. - tracing::error!( - target: "client", - ?err, - shard_id = ?key.shard_id, - height_created = key.height_created, - "Failed to reed solomon decode witness parts. Maybe malicious or corrupt data." - ); + { + let mut parts_cache = self.parts_cache.lock().unwrap(); + let Some(entry) = parts_cache.get_mut(&key) else { + return Ok(()); + }; + if let Some((decode_result, accessed_contracts)) = entry.update(update) { + // Record the time taken from receiving first part to decoding partial witness. + let time_to_last_part = Instant::now().signed_duration_since(entry.created_at); + metrics::PARTIAL_WITNESS_TIME_TO_LAST_PART + .with_label_values(&[key.shard_id.to_string().as_str()]) + .observe(time_to_last_part.as_seconds_f64()); + + parts_cache.pop(&key); + drop(parts_cache); + self.processed_witnesses.push(key.clone(), ()); + + let encoded_witness = match decode_result { + Ok(encoded_chunk_state_witness) => encoded_chunk_state_witness, + Err(err) => { + // We ideally never expect the decoding to fail. In case it does, we received a bad part + // from the chunk producer. + tracing::error!( + target: "client", + ?err, + shard_id = ?key.shard_id, + height_created = key.height_created, + "Failed to reed solomon decode witness parts. Maybe malicious or corrupt data." + ); + return Err(Error::InvalidPartialChunkStateWitness(format!( + "Failed to reed solomon decode witness parts: {err}", + ))); + } + }; + + let (mut witness, raw_witness_size) = + self.decode_state_witness(&encoded_witness)?; + if witness.chunk_production_key() != key { return Err(Error::InvalidPartialChunkStateWitness(format!( - "Failed to reed solomon decode witness parts: {err}", + "Decoded witness key {:?} doesn't match partial witness {:?}", + witness.chunk_production_key(), + key, ))); } - }; - - let (mut witness, raw_witness_size) = self.decode_state_witness(&encoded_witness)?; - if witness.chunk_production_key() != key { - return Err(Error::InvalidPartialChunkStateWitness(format!( - "Decoded witness key {:?} doesn't match partial witness {:?}", - witness.chunk_production_key(), - key, - ))); - } - // Merge accessed contracts into the main transition's partial state. - let PartialState::TrieValues(values) = &mut witness.main_state_transition.base_state; - values.extend(accessed_contracts.into_iter().map(|code| code.0.into())); + // Merge accessed contracts into the main transition's partial state. + let PartialState::TrieValues(values) = + &mut witness.main_state_transition.base_state; + values.extend(accessed_contracts.into_iter().map(|code| code.0.into())); - tracing::debug!(target: "client", ?key, "Sending encoded witness to client."); - self.client_sender.send(ChunkStateWitnessMessage { witness, raw_witness_size }); + tracing::debug!(target: "client", ?key, "Sending encoded witness to client."); + self.client_sender.send(ChunkStateWitnessMessage { witness, raw_witness_size }); + } } self.record_total_parts_cache_size_metric(); Ok(()) } - fn get_encoder(&mut self, key: &ChunkProductionKey) -> Result, Error> { + fn get_encoder(&self, key: &ChunkProductionKey) -> Result, Error> { // The expected number of parts for the Reed Solomon encoding is the number of chunk validators. let num_parts = self .epoch_manager .get_chunk_validator_assignments(&key.epoch_id, key.shard_id, key.height_created)? .len(); - Ok(self.encoders.entry(num_parts)) + let mut encoders = self.encoders.lock().unwrap(); + Ok(encoders.entry(num_parts)) } // Function to insert a new entry into the cache for the chunk hash if it does not already exist // We additionally check if an evicted entry has been fully decoded and processed. - fn maybe_insert_new_entry_in_parts_cache(&mut self, key: &ChunkProductionKey) { - if !self.parts_cache.contains(key) { + fn maybe_insert_new_entry_in_parts_cache(&self, key: &ChunkProductionKey) { + let mut parts_cache = self.parts_cache.lock().unwrap(); + if !parts_cache.contains(key) { if let Some((evicted_key, evicted_entry)) = - self.parts_cache.push(key.clone(), CacheEntry::new(key.shard_id)) + parts_cache.push(key.clone(), CacheEntry::new(key.shard_id)) { tracing::warn!( target: "client", @@ -461,7 +470,8 @@ impl PartialEncodedStateWitnessTracker { } fn record_total_parts_cache_size_metric(&self) { - let total_size: usize = self.parts_cache.iter().map(|(_, entry)| entry.total_size()).sum(); + let parts_cache = self.parts_cache.lock().unwrap(); + let total_size: usize = parts_cache.iter().map(|(_, entry)| entry.total_size()).sum(); metrics::PARTIAL_WITNESS_CACHE_SIZE.set(total_size as f64); } diff --git a/utils/near-cache/src/sync.rs b/utils/near-cache/src/sync.rs index 4b971c9a655..a0f2db2f384 100644 --- a/utils/near-cache/src/sync.rs +++ b/utils/near-cache/src/sync.rs @@ -30,6 +30,14 @@ where self.inner.lock().unwrap().is_empty() } + pub fn contains(&self, key: &K) -> bool { + self.inner.lock().unwrap().contains(key) + } + + pub fn push(&self, key: K, value: V) -> Option<(K, V)> { + self.inner.lock().unwrap().push(key, value) + } + /// Return the value of they key in the cache otherwise computes the value and inserts it into /// the cache. If the key is already in the cache, they get moved to the head of /// the LRU list. From b5102e8abee3ad951cbc49efe9cc807d33e7b511 Mon Sep 17 00:00:00 2001 From: Stefan Neamtu Date: Fri, 3 Jan 2025 11:17:59 +0200 Subject: [PATCH 06/13] cleanup --- .../partial_witness/partial_witness_actor.rs | 69 ++++--------------- utils/near-cache/src/sync.rs | 4 ++ 2 files changed, 16 insertions(+), 57 deletions(-) diff --git a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs index 20933ce69f7..cb6d9907645 100644 --- a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs +++ b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs @@ -368,36 +368,6 @@ impl PartialWitnessActor { )); } - /// Sends the witness part to the chunk validators, except the chunk producer that generated the witness part. - fn _forward_state_witness_part( - &self, - partial_witness: PartialEncodedStateWitness, - ) -> Result<(), Error> { - let ChunkProductionKey { shard_id, epoch_id, height_created } = - partial_witness.chunk_production_key(); - let chunk_producer = self - .epoch_manager - .get_chunk_producer_info(&ChunkProductionKey { epoch_id, height_created, shard_id })? - .take_account_id(); - - // Forward witness part to chunk validators except the validator that produced the chunk and witness. - let target_chunk_validators = self - .epoch_manager - .get_chunk_validator_assignments(&epoch_id, shard_id, height_created)? - .ordered_chunk_validators() - .into_iter() - .filter(|validator| validator != &chunk_producer) - .collect(); - - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::PartialEncodedStateWitnessForward( - target_chunk_validators, - partial_witness, - ), - )); - Ok(()) - } - /// Function to handle receiving partial_encoded_state_witness message from chunk producer. fn handle_partial_encoded_state_witness( &mut self, @@ -429,24 +399,24 @@ impl PartialWitnessActor { self.partial_witness_spawner.spawn("handle_partial_encoded_state_witness", move || { // Validate the partial encoded state witness and forward the part to all the chunk validators. - let validation = validate_partial_encoded_state_witness( + match validate_partial_encoded_state_witness( epoch_manager.as_ref(), &partial_witness, &signer, runtime_adapter.store(), - ); - match validation { + ) { Ok(true) => { - forward_state_witness_part_v2( - partial_witness, - target_chunk_validators, - network_adapter, - ); + network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::PartialEncodedStateWitnessForward( + target_chunk_validators, + partial_witness, + ), + )); } Ok(false) => { tracing::warn!( target: "client", - "Received partial encoded state witness that is not valid" + "Received invalid partial encoded state witness" ); } Err(err) => { @@ -477,13 +447,12 @@ impl PartialWitnessActor { "handle_partial_encoded_state_witness_forward", move || { // Validate the partial encoded state witness and store the partial encoded state witness. - let validation = validate_partial_encoded_state_witness( + match validate_partial_encoded_state_witness( epoch_manager.as_ref(), &partial_witness, &signer, runtime_adapter.store(), - ); - match validation { + ) { Ok(true) => { partial_witness_tracker .store_partial_encoded_state_witness(partial_witness) @@ -492,7 +461,7 @@ impl PartialWitnessActor { Ok(false) => { tracing::warn!( target: "client", - "Received partial encoded state witness that is not valid" + "Received invalid partial encoded state witness" ); } Err(err) => { @@ -869,17 +838,3 @@ fn contracts_cache_contains_contract( let cache_key = get_contract_cache_key(contract_hash.0, &runtime_config.wasm_config); cache.memory_cache().contains(cache_key) || cache.has(&cache_key).is_ok_and(|has| has) } - -/// Sends the witness part to the chunk validators, except the chunk producer that generated the witness part. -fn forward_state_witness_part_v2( - partial_witness: PartialEncodedStateWitness, - target_chunk_validators: Vec, - network_adapter: PeerManagerAdapter, -) { - network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::PartialEncodedStateWitnessForward( - target_chunk_validators, - partial_witness, - ), - )); -} diff --git a/utils/near-cache/src/sync.rs b/utils/near-cache/src/sync.rs index a0f2db2f384..8cec6a2cc33 100644 --- a/utils/near-cache/src/sync.rs +++ b/utils/near-cache/src/sync.rs @@ -30,10 +30,14 @@ where self.inner.lock().unwrap().is_empty() } + /// Returns true if the cache contains the key and false otherwise. pub fn contains(&self, key: &K) -> bool { self.inner.lock().unwrap().contains(key) } + /// Pushes a key-value pair into the cache. If an entry with key `k` already exists in + /// the cache or another cache entry is removed (due to the lru's capacity), + /// then it returns the old entry's key-value pair. Otherwise, returns `None`. pub fn push(&self, key: K, value: V) -> Option<(K, V)> { self.inner.lock().unwrap().push(key, value) } From 53246b4d1f66cda02f1d54edb99b412f90f3bf0a Mon Sep 17 00:00:00 2001 From: Stefan Neamtu Date: Fri, 3 Jan 2025 12:33:01 +0200 Subject: [PATCH 07/13] log error if store_partial_encoded_state_witness fails --- .../partial_witness/partial_witness_actor.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs index cb6d9907645..c8ae40851b1 100644 --- a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs +++ b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs @@ -454,9 +454,9 @@ impl PartialWitnessActor { runtime_adapter.store(), ) { Ok(true) => { - partial_witness_tracker - .store_partial_encoded_state_witness(partial_witness) - .unwrap(); + if let Err(err) = partial_witness_tracker.store_partial_encoded_state_witness(partial_witness) { + tracing::error!(target: "client", "Failed to store partial encoded state witness: {}", err); + } } Ok(false) => { tracing::warn!( From 445ff013ee99a856979f3ebbf56d807784b58c43 Mon Sep 17 00:00:00 2001 From: Stefan Neamtu Date: Tue, 7 Jan 2025 11:21:12 +0200 Subject: [PATCH 08/13] address comments --- .../partial_witness_tracker.rs | 121 +++++++++--------- 1 file changed, 60 insertions(+), 61 deletions(-) diff --git a/chain/client/src/stateless_validation/partial_witness/partial_witness_tracker.rs b/chain/client/src/stateless_validation/partial_witness/partial_witness_tracker.rs index 89d2091be8b..b8e9de73a8e 100644 --- a/chain/client/src/stateless_validation/partial_witness/partial_witness_tracker.rs +++ b/chain/client/src/stateless_validation/partial_witness/partial_witness_tracker.rs @@ -309,13 +309,13 @@ pub struct PartialEncodedStateWitnessTracker { /// Epoch manager to get the set of chunk validators epoch_manager: Arc, /// Keeps track of state witness parts received from chunk producers. - parts_cache: Arc>>, + parts_cache: Mutex>, /// Keeps track of the already decoded witnesses. This is needed /// to protect chunk validator from processing the same witness multiple /// times. - processed_witnesses: Arc>, + processed_witnesses: SyncLruCache, /// Reed Solomon encoder for decoding state witness parts. - encoders: Arc>, + encoders: Mutex, } impl PartialEncodedStateWitnessTracker { @@ -326,11 +326,11 @@ impl PartialEncodedStateWitnessTracker { Self { client_sender, epoch_manager, - parts_cache: Arc::new(Mutex::new(LruCache::new( + parts_cache: Mutex::new(LruCache::new( NonZeroUsize::new(WITNESS_PARTS_CACHE_SIZE).unwrap(), - ))), - processed_witnesses: Arc::new(SyncLruCache::new(PROCESSED_WITNESSES_CACHE_SIZE)), - encoders: Arc::new(Mutex::new(ReedSolomonEncoderCache::new(WITNESS_RATIO_DATA_PARTS))), + )), + processed_witnesses: SyncLruCache::new(PROCESSED_WITNESSES_CACHE_SIZE), + encoders: Mutex::new(ReedSolomonEncoderCache::new(WITNESS_RATIO_DATA_PARTS)), } } @@ -383,60 +383,65 @@ impl PartialEncodedStateWitnessTracker { if create_if_not_exists { self.maybe_insert_new_entry_in_parts_cache(&key); } + let mut parts_cache = self.parts_cache.lock().unwrap(); + let Some(entry) = parts_cache.get_mut(&key) else { + return Ok(()); + }; + let total_size: usize = if let Some((decode_result, accessed_contracts)) = + entry.update(update) { - let mut parts_cache = self.parts_cache.lock().unwrap(); - let Some(entry) = parts_cache.get_mut(&key) else { - return Ok(()); - }; - if let Some((decode_result, accessed_contracts)) = entry.update(update) { - // Record the time taken from receiving first part to decoding partial witness. - let time_to_last_part = Instant::now().signed_duration_since(entry.created_at); - metrics::PARTIAL_WITNESS_TIME_TO_LAST_PART - .with_label_values(&[key.shard_id.to_string().as_str()]) - .observe(time_to_last_part.as_seconds_f64()); - - parts_cache.pop(&key); - drop(parts_cache); - self.processed_witnesses.push(key.clone(), ()); - - let encoded_witness = match decode_result { - Ok(encoded_chunk_state_witness) => encoded_chunk_state_witness, - Err(err) => { - // We ideally never expect the decoding to fail. In case it does, we received a bad part - // from the chunk producer. - tracing::error!( - target: "client", - ?err, - shard_id = ?key.shard_id, - height_created = key.height_created, - "Failed to reed solomon decode witness parts. Maybe malicious or corrupt data." - ); - return Err(Error::InvalidPartialChunkStateWitness(format!( - "Failed to reed solomon decode witness parts: {err}", - ))); - } - }; - - let (mut witness, raw_witness_size) = - self.decode_state_witness(&encoded_witness)?; - if witness.chunk_production_key() != key { + // Record the time taken from receiving first part to decoding partial witness. + let time_to_last_part = Instant::now().signed_duration_since(entry.created_at); + metrics::PARTIAL_WITNESS_TIME_TO_LAST_PART + .with_label_values(&[key.shard_id.to_string().as_str()]) + .observe(time_to_last_part.as_seconds_f64()); + + parts_cache.pop(&key); + let total_size = parts_cache.iter().map(|(_, entry)| entry.total_size()).sum(); + drop(parts_cache); + + self.processed_witnesses.push(key.clone(), ()); + + let encoded_witness = match decode_result { + Ok(encoded_chunk_state_witness) => encoded_chunk_state_witness, + Err(err) => { + // We ideally never expect the decoding to fail. In case it does, we received a bad part + // from the chunk producer. + tracing::error!( + target: "client", + ?err, + shard_id = ?key.shard_id, + height_created = key.height_created, + "Failed to reed solomon decode witness parts. Maybe malicious or corrupt data." + ); return Err(Error::InvalidPartialChunkStateWitness(format!( - "Decoded witness key {:?} doesn't match partial witness {:?}", - witness.chunk_production_key(), - key, + "Failed to reed solomon decode witness parts: {err}", ))); } + }; - // Merge accessed contracts into the main transition's partial state. - let PartialState::TrieValues(values) = - &mut witness.main_state_transition.base_state; - values.extend(accessed_contracts.into_iter().map(|code| code.0.into())); - - tracing::debug!(target: "client", ?key, "Sending encoded witness to client."); - self.client_sender.send(ChunkStateWitnessMessage { witness, raw_witness_size }); + let (mut witness, raw_witness_size) = self.decode_state_witness(&encoded_witness)?; + if witness.chunk_production_key() != key { + return Err(Error::InvalidPartialChunkStateWitness(format!( + "Decoded witness key {:?} doesn't match partial witness {:?}", + witness.chunk_production_key(), + key, + ))); } - } - self.record_total_parts_cache_size_metric(); + + // Merge accessed contracts into the main transition's partial state. + let PartialState::TrieValues(values) = &mut witness.main_state_transition.base_state; + values.extend(accessed_contracts.into_iter().map(|code| code.0.into())); + + tracing::debug!(target: "client", ?key, "Sending encoded witness to client."); + self.client_sender.send(ChunkStateWitnessMessage { witness, raw_witness_size }); + + total_size + } else { + parts_cache.iter().map(|(_, entry)| entry.total_size()).sum() + }; + metrics::PARTIAL_WITNESS_CACHE_SIZE.set(total_size as f64); + Ok(()) } @@ -469,12 +474,6 @@ impl PartialEncodedStateWitnessTracker { } } - fn record_total_parts_cache_size_metric(&self) { - let parts_cache = self.parts_cache.lock().unwrap(); - let total_size: usize = parts_cache.iter().map(|(_, entry)| entry.total_size()).sum(); - metrics::PARTIAL_WITNESS_CACHE_SIZE.set(total_size as f64); - } - fn decode_state_witness( &self, encoded_witness: &EncodedChunkStateWitness, From 1e6c4b4a3c95a32a919a87d46d876861e1cf19d1 Mon Sep 17 00:00:00 2001 From: Stefan Neamtu Date: Tue, 7 Jan 2025 15:18:00 +0200 Subject: [PATCH 09/13] wip experiment --- .../partial_witness/mod.rs | 1 + .../partial_witness/partial_witness_actor.rs | 795 +--------------- .../partial_witness_actor_v2.rs | 864 ++++++++++++++++++ 3 files changed, 914 insertions(+), 746 deletions(-) create mode 100644 chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs diff --git a/chain/client/src/stateless_validation/partial_witness/mod.rs b/chain/client/src/stateless_validation/partial_witness/mod.rs index 9861d111eb8..b627a5c7348 100644 --- a/chain/client/src/stateless_validation/partial_witness/mod.rs +++ b/chain/client/src/stateless_validation/partial_witness/mod.rs @@ -1,6 +1,7 @@ mod encoding; mod partial_deploys_tracker; pub mod partial_witness_actor; +pub mod partial_witness_actor_v2; mod partial_witness_tracker; pub use encoding::witness_part_length; diff --git a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs index c8ae40851b1..719fa2834a5 100644 --- a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs +++ b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs @@ -1,15 +1,8 @@ -use std::collections::HashSet; -use std::num::NonZeroUsize; -use std::sync::Arc; - -use itertools::Itertools; -use lru::LruCache; -use near_async::futures::{AsyncComputationSpawner, AsyncComputationSpawnerExt}; -use near_async::messaging::{Actor, CanSend, Handler, Sender}; +use near_async::futures::AsyncComputationSpawner; +use near_async::messaging::{Actor, Handler, Sender}; use near_async::time::Clock; use near_async::{MultiSend, MultiSenderFrom}; use near_chain::types::RuntimeAdapter; -use near_chain::Error; use near_chain_configs::MutableValidatorSigner; use near_epoch_manager::EpochManagerAdapter; use near_network::state_witness::{ @@ -17,67 +10,21 @@ use near_network::state_witness::{ ContractCodeResponseMessage, PartialEncodedContractDeploysMessage, PartialEncodedStateWitnessForwardMessage, PartialEncodedStateWitnessMessage, }; -use near_network::types::{NetworkRequests, PeerManagerAdapter, PeerManagerMessageRequest}; -use near_parameters::RuntimeConfig; +use near_network::types::PeerManagerAdapter; use near_performance_metrics_macros::perf; -use near_primitives::reed_solomon::{ReedSolomonEncoder, ReedSolomonEncoderCache}; -use near_primitives::sharding::ShardChunkHeader; -use near_primitives::stateless_validation::contract_distribution::{ - ChunkContractAccesses, ChunkContractDeploys, CodeBytes, CodeHash, ContractCodeRequest, - ContractCodeResponse, ContractUpdates, MainTransitionKey, PartialEncodedContractDeploys, - PartialEncodedContractDeploysPart, -}; -use near_primitives::stateless_validation::partial_witness::PartialEncodedStateWitness; -use near_primitives::stateless_validation::state_witness::{ - ChunkStateWitness, ChunkStateWitnessAck, EncodedChunkStateWitness, -}; -use near_primitives::stateless_validation::stored_chunk_state_transition_data::StoredChunkStateTransitionData; -use near_primitives::stateless_validation::ChunkProductionKey; -use near_primitives::types::{AccountId, EpochId, ShardId}; -use near_primitives::validator_signer::ValidatorSigner; -use near_store::adapter::trie_store::TrieStoreAdapter; -use near_store::{DBCol, StorageError, TrieDBStorage, TrieStorage}; -use near_vm_runner::{get_contract_cache_key, ContractCode, ContractRuntimeCache}; -use rand::Rng; +use near_primitives::stateless_validation::contract_distribution::ContractUpdates; +use near_primitives::stateless_validation::state_witness::ChunkStateWitness; +use near_primitives::types::ShardId; +use std::sync::Arc; use crate::client_actor::ClientSenderForPartialWitness; -use crate::metrics; -use crate::stateless_validation::state_witness_tracker::ChunkStateWitnessTracker; -use crate::stateless_validation::validate::{ - validate_chunk_contract_accesses, validate_contract_code_request, - validate_partial_encoded_contract_deploys, validate_partial_encoded_state_witness, -}; -use super::encoding::{CONTRACT_DEPLOYS_RATIO_DATA_PARTS, WITNESS_RATIO_DATA_PARTS}; -use super::partial_deploys_tracker::PartialEncodedContractDeploysTracker; -use super::partial_witness_tracker::PartialEncodedStateWitnessTracker; -use near_primitives::utils::compression::CompressedData; - -const PROCESSED_CONTRACT_CODE_REQUESTS_CACHE_SIZE: usize = 30; +use super::partial_witness_actor_v2::{ + PartialWitnessMsg, PartialWitnessSender, PartialWitnessService, +}; pub struct PartialWitnessActor { - /// Adapter to send messages to the network. - network_adapter: PeerManagerAdapter, - /// Validator signer to sign the state witness. This field is mutable and optional. Use with caution! - /// Lock the value of mutable validator signer for the duration of a request to ensure consistency. - /// Please note that the locked value should not be stored anywhere or passed through the thread boundary. - my_signer: MutableValidatorSigner, - epoch_manager: Arc, - runtime: Arc, - /// Tracks the parts of the state witness sent from chunk producers to chunk validators. - partial_witness_tracker: Arc, - partial_deploys_tracker: PartialEncodedContractDeploysTracker, - /// Tracks a collection of state witnesses sent from chunk producers to chunk validators. - state_witness_tracker: ChunkStateWitnessTracker, - /// Reed Solomon encoder for encoding state witness parts. - /// We keep one wrapper for each length of chunk_validators to avoid re-creating the encoder. - witness_encoders: ReedSolomonEncoderCache, - /// Same as above for contract deploys. - contract_deploys_encoders: ReedSolomonEncoderCache, - compile_contracts_spawner: Arc, - partial_witness_spawner: Arc, - /// AccountId in the key corresponds to the requester (chunk validator). - processed_contract_code_requests: LruCache<(ChunkProductionKey, AccountId), ()>, + tx: PartialWitnessSender, } impl Actor for PartialWitnessActor {} @@ -98,63 +45,75 @@ pub struct PartialWitnessSenderForClient { impl Handler for PartialWitnessActor { #[perf] fn handle(&mut self, msg: DistributeStateWitnessRequest) { - if let Err(err) = self.handle_distribute_state_witness_request(msg) { - tracing::error!(target: "client", ?err, "Failed to handle distribute chunk state witness request"); - } + let tx = self.tx.clone(); + actix::spawn(async move { + tx.send(PartialWitnessMsg::DistributeStateWitnessRequest(Box::new(msg))).await.unwrap(); + }); } } impl Handler for PartialWitnessActor { fn handle(&mut self, msg: ChunkStateWitnessAckMessage) { - self.handle_chunk_state_witness_ack(msg.0); + let tx = self.tx.clone(); + actix::spawn(async move { + tx.send(PartialWitnessMsg::ChunkStateWitnessAckMessage(msg)).await.unwrap(); + }); } } impl Handler for PartialWitnessActor { fn handle(&mut self, msg: PartialEncodedStateWitnessMessage) { - if let Err(err) = self.handle_partial_encoded_state_witness(msg.0) { - tracing::error!(target: "client", ?err, "Failed to handle PartialEncodedStateWitnessMessage"); - } + let tx = self.tx.clone(); + actix::spawn(async move { + tx.send(PartialWitnessMsg::PartialEncodedStateWitnessMessage(msg)).await.unwrap(); + }); } } impl Handler for PartialWitnessActor { fn handle(&mut self, msg: PartialEncodedStateWitnessForwardMessage) { - if let Err(err) = self.handle_partial_encoded_state_witness_forward(msg.0) { - tracing::error!(target: "client", ?err, "Failed to handle PartialEncodedStateWitnessForwardMessage"); - } + let tx = self.tx.clone(); + actix::spawn(async move { + tx.send(PartialWitnessMsg::PartialEncodedStateWitnessForwardMessage(msg)) + .await + .unwrap(); + }); } } impl Handler for PartialWitnessActor { fn handle(&mut self, msg: ChunkContractAccessesMessage) { - if let Err(err) = self.handle_chunk_contract_accesses(msg.0) { - tracing::error!(target: "client", ?err, "Failed to handle ChunkContractAccessesMessage"); - } + let tx = self.tx.clone(); + actix::spawn(async move { + tx.send(PartialWitnessMsg::ChunkContractAccessesMessage(msg)).await.unwrap(); + }); } } impl Handler for PartialWitnessActor { fn handle(&mut self, msg: PartialEncodedContractDeploysMessage) { - if let Err(err) = self.handle_partial_encoded_contract_deploys(msg.0) { - tracing::error!(target: "client", ?err, "Failed to handle PartialEncodedContractDeploysMessage"); - } + let tx = self.tx.clone(); + actix::spawn(async move { + tx.send(PartialWitnessMsg::PartialEncodedContractDeploysMessage(msg)).await.unwrap(); + }); } } impl Handler for PartialWitnessActor { fn handle(&mut self, msg: ContractCodeRequestMessage) { - if let Err(err) = self.handle_contract_code_request(msg.0) { - tracing::error!(target: "client", ?err, "Failed to handle ContractCodeRequestMessage"); - } + let tx = self.tx.clone(); + actix::spawn(async move { + tx.send(PartialWitnessMsg::ContractCodeRequestMessage(msg)).await.unwrap(); + }); } } impl Handler for PartialWitnessActor { fn handle(&mut self, msg: ContractCodeResponseMessage) { - if let Err(err) = self.handle_contract_code_response(msg.0) { - tracing::error!(target: "client", ?err, "Failed to handle ContractCodeResponseMessage"); - } + let tx = self.tx.clone(); + actix::spawn(async move { + tx.send(PartialWitnessMsg::ContractCodeResponseMessage(msg)).await.unwrap(); + }); } } @@ -169,672 +128,16 @@ impl PartialWitnessActor { compile_contracts_spawner: Arc, partial_witness_spawner: Arc, ) -> Self { - let partial_witness_tracker = - Arc::new(PartialEncodedStateWitnessTracker::new(client_sender, epoch_manager.clone())); - Self { + let tx = PartialWitnessService::new( + clock, network_adapter, + client_sender, my_signer, epoch_manager, - partial_witness_tracker, - partial_deploys_tracker: PartialEncodedContractDeploysTracker::new(), - state_witness_tracker: ChunkStateWitnessTracker::new(clock), runtime, - witness_encoders: ReedSolomonEncoderCache::new(WITNESS_RATIO_DATA_PARTS), - contract_deploys_encoders: ReedSolomonEncoderCache::new( - CONTRACT_DEPLOYS_RATIO_DATA_PARTS, - ), compile_contracts_spawner, partial_witness_spawner, - processed_contract_code_requests: LruCache::new( - NonZeroUsize::new(PROCESSED_CONTRACT_CODE_REQUESTS_CACHE_SIZE).unwrap(), - ), - } - } - - fn handle_distribute_state_witness_request( - &mut self, - msg: DistributeStateWitnessRequest, - ) -> Result<(), Error> { - let DistributeStateWitnessRequest { - state_witness, - contract_updates: ContractUpdates { contract_accesses, contract_deploys }, - main_transition_shard_id, - } = msg; - - tracing::debug!( - target: "client", - chunk_hash=?state_witness.chunk_header.chunk_hash(), - "distribute_chunk_state_witness", - ); - - // We send the state-witness and contract-updates in the following order: - // 1. We send the hashes of the contract code accessed (if contract code is excluded from witness and any contracts are called) - // before the state witness in order to allow validators to check and request missing contract code, while waiting for witness parts. - // 2. We send the state witness parts to witness-part owners. - // 3. We send the contract deploys parts to other validators (that do not validate the witness in this turn). This is lower priority - // since the newly-deployed contracts will be needed by other validators in later turns. - - let signer = self.my_validator_signer()?; - let key = state_witness.chunk_production_key(); - let chunk_validators = self - .epoch_manager - .get_chunk_validator_assignments(&key.epoch_id, key.shard_id, key.height_created) - .expect("Chunk validators must be defined") - .ordered_chunk_validators(); - - if !contract_accesses.is_empty() { - self.send_contract_accesses_to_chunk_validators( - key.clone(), - contract_accesses, - MainTransitionKey { - block_hash: state_witness.main_state_transition.block_hash, - shard_id: main_transition_shard_id, - }, - &chunk_validators, - &signer, - ); - } - - let witness_bytes = compress_witness(&state_witness)?; - self.send_state_witness_parts( - key.epoch_id, - &state_witness.chunk_header, - witness_bytes, - &chunk_validators, - &signer, - ); - - if !contract_deploys.is_empty() { - self.send_chunk_contract_deploys_parts(key, contract_deploys)?; - } - Ok(()) - } - - // Function to generate the parts of the state witness and return them as a tuple of chunk_validator and part. - fn generate_state_witness_parts( - &mut self, - epoch_id: EpochId, - chunk_header: &ShardChunkHeader, - witness_bytes: EncodedChunkStateWitness, - chunk_validators: &[AccountId], - signer: &ValidatorSigner, - ) -> Vec<(AccountId, PartialEncodedStateWitness)> { - tracing::debug!( - target: "client", - chunk_hash=?chunk_header.chunk_hash(), - ?chunk_validators, - "generate_state_witness_parts", - ); - - // Break the state witness into parts using Reed Solomon encoding. - let encoder = self.witness_encoders.entry(chunk_validators.len()); - let (parts, encoded_length) = encoder.encode(&witness_bytes); - - chunk_validators - .iter() - .zip_eq(parts) - .enumerate() - .map(|(part_ord, (chunk_validator, part))| { - // It's fine to unwrap part here as we just constructed the parts above and we expect - // all of them to be present. - let partial_witness = PartialEncodedStateWitness::new( - epoch_id, - chunk_header.clone(), - part_ord, - part.unwrap().to_vec(), - encoded_length, - signer, - ); - (chunk_validator.clone(), partial_witness) - }) - .collect_vec() - } - - fn generate_contract_deploys_parts( - &mut self, - key: &ChunkProductionKey, - deploys: ChunkContractDeploys, - ) -> Result, Error> { - let validators = self.ordered_contract_deploys_validators(key)?; - // Note that target validators do not include the chunk producers, and thus in some case - // (eg. tests or small networks) there may be no other validators to send the new contracts to. - if validators.is_empty() { - return Ok(vec![]); - } - - let encoder = self.contract_deploys_encoder(validators.len()); - let (parts, encoded_length) = encoder.encode(&deploys); - let signer = self.my_validator_signer()?; - - Ok(validators - .into_iter() - .zip_eq(parts) - .enumerate() - .map(|(part_ord, (validator, part))| { - let partial_deploys = PartialEncodedContractDeploys::new( - key.clone(), - PartialEncodedContractDeploysPart { - part_ord, - data: part.unwrap().to_vec().into_boxed_slice(), - encoded_length, - }, - &signer, - ); - (validator, partial_deploys) - }) - .collect_vec()) - } - - // Break the state witness into parts and send each part to the corresponding chunk validator owner. - // The chunk validator owner will then forward the part to all other chunk validators. - // Each chunk validator would collect the parts and reconstruct the state witness. - fn send_state_witness_parts( - &mut self, - epoch_id: EpochId, - chunk_header: &ShardChunkHeader, - witness_bytes: EncodedChunkStateWitness, - chunk_validators: &[AccountId], - signer: &ValidatorSigner, - ) { - // Capture these values first, as the sources are consumed before calling record_witness_sent. - let chunk_hash = chunk_header.chunk_hash(); - let witness_size_in_bytes = witness_bytes.size_bytes(); - - // Record time taken to encode the state witness parts. - let shard_id_label = chunk_header.shard_id().to_string(); - let encode_timer = metrics::PARTIAL_WITNESS_ENCODE_TIME - .with_label_values(&[shard_id_label.as_str()]) - .start_timer(); - let validator_witness_tuple = self.generate_state_witness_parts( - epoch_id, - chunk_header, - witness_bytes, - chunk_validators, - signer, - ); - encode_timer.observe_duration(); - - // Record the witness in order to match the incoming acks for measuring round-trip times. - // See process_chunk_state_witness_ack for the handling of the ack messages. - self.state_witness_tracker.record_witness_sent( - chunk_hash, - witness_size_in_bytes, - validator_witness_tuple.len(), - ); - - // Send the parts to the corresponding chunk validator owners. - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::PartialEncodedStateWitness(validator_witness_tuple), - )); - } - - /// Function to handle receiving partial_encoded_state_witness message from chunk producer. - fn handle_partial_encoded_state_witness( - &mut self, - partial_witness: PartialEncodedStateWitness, - ) -> Result<(), Error> { - tracing::debug!(target: "client", ?partial_witness, "Receive PartialEncodedStateWitnessMessage"); - let signer = self.my_validator_signer()?; - let epoch_manager = self.epoch_manager.clone(); - let runtime_adapter = self.runtime.clone(); - - let ChunkProductionKey { shard_id, epoch_id, height_created } = - partial_witness.chunk_production_key(); - - let chunk_producer = self - .epoch_manager - .get_chunk_producer_info(&ChunkProductionKey { epoch_id, height_created, shard_id })? - .take_account_id(); - - // Forward witness part to chunk validators except the validator that produced the chunk and witness. - let target_chunk_validators = self - .epoch_manager - .get_chunk_validator_assignments(&epoch_id, shard_id, height_created)? - .ordered_chunk_validators() - .into_iter() - .filter(|validator| validator != &chunk_producer) - .collect(); - - let network_adapter = self.network_adapter.clone(); - - self.partial_witness_spawner.spawn("handle_partial_encoded_state_witness", move || { - // Validate the partial encoded state witness and forward the part to all the chunk validators. - match validate_partial_encoded_state_witness( - epoch_manager.as_ref(), - &partial_witness, - &signer, - runtime_adapter.store(), - ) { - Ok(true) => { - network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::PartialEncodedStateWitnessForward( - target_chunk_validators, - partial_witness, - ), - )); - } - Ok(false) => { - tracing::warn!( - target: "client", - "Received invalid partial encoded state witness" - ); - } - Err(err) => { - tracing::warn!( - target: "client", - "Encountered error during validation: {}", - err - ); - } - } - }); - - Ok(()) - } - - /// Function to handle receiving partial_encoded_state_witness_forward message from chunk producer. - fn handle_partial_encoded_state_witness_forward( - &mut self, - partial_witness: PartialEncodedStateWitness, - ) -> Result<(), Error> { - tracing::debug!(target: "client", ?partial_witness, "Receive PartialEncodedStateWitnessForwardMessage"); - - let signer = self.my_validator_signer()?; - let partial_witness_tracker = self.partial_witness_tracker.clone(); - let epoch_manager = self.epoch_manager.clone(); - let runtime_adapter = self.runtime.clone(); - self.partial_witness_spawner.spawn( - "handle_partial_encoded_state_witness_forward", - move || { - // Validate the partial encoded state witness and store the partial encoded state witness. - match validate_partial_encoded_state_witness( - epoch_manager.as_ref(), - &partial_witness, - &signer, - runtime_adapter.store(), - ) { - Ok(true) => { - if let Err(err) = partial_witness_tracker.store_partial_encoded_state_witness(partial_witness) { - tracing::error!(target: "client", "Failed to store partial encoded state witness: {}", err); - } - } - Ok(false) => { - tracing::warn!( - target: "client", - "Received invalid partial encoded state witness" - ); - } - Err(err) => { - tracing::warn!( - target: "client", - "Encountered error during validation: {}", - err - ); - } - } - }, - ); - - Ok(()) - } - - /// Handles partial contract deploy message received from a peer. - /// - /// This message may belong to one of two steps of distributing contract code. In the first step the code is compressed - /// and encoded into parts using Reed Solomon encoding and each part is sent to one of the validators (part owner). - /// See `send_chunk_contract_deploys_parts` for the code implementing this. In the second step each validator (part-owner) - /// forwards the part it receives to other validators. - fn handle_partial_encoded_contract_deploys( - &mut self, - partial_deploys: PartialEncodedContractDeploys, - ) -> Result<(), Error> { - tracing::debug!(target: "client", ?partial_deploys, "Receive PartialEncodedContractDeploys"); - if !validate_partial_encoded_contract_deploys( - self.epoch_manager.as_ref(), - &partial_deploys, - self.runtime.store(), - )? { - return Ok(()); - } - if self.partial_deploys_tracker.already_processed(&partial_deploys) { - return Ok(()); - } - let key = partial_deploys.chunk_production_key().clone(); - let validators = self.ordered_contract_deploys_validators(&key)?; - if validators.is_empty() { - // Note that target validators do not include the chunk producers, and thus in some case - // (eg. tests or small networks) there may be no other validators to send the new contracts to. - // In such case, the message we are handling here should not be sent in the first place, - // unless there is a bug or adversarial behavior that sends the message. - debug_assert!(false, "No target validators, we must not receive this message"); - return Ok(()); - } - - // Forward to other validators if the part received is my part - let signer = self.my_validator_signer()?; - let my_account_id = signer.validator_id(); - let Some(my_part_ord) = validators.iter().position(|validator| validator == my_account_id) - else { - tracing::warn!( - target: "client", - ?key, - "Validator is not a part of contract deploys distribution" - ); - return Ok(()); - }; - if partial_deploys.part().part_ord == my_part_ord { - let other_validators = validators - .iter() - .filter(|&validator| validator != my_account_id) - .cloned() - .collect_vec(); - if !other_validators.is_empty() { - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::PartialEncodedContractDeploys( - other_validators, - partial_deploys.clone(), - ), - )); - } - } - - // Store part - let encoder = self.contract_deploys_encoder(validators.len()); - if let Some(deploys) = self - .partial_deploys_tracker - .store_partial_encoded_contract_deploys(partial_deploys, encoder)? - { - let contracts = match deploys.decompress_contracts() { - Ok(contracts) => contracts, - Err(err) => { - tracing::warn!( - target: "client", - ?err, - ?key, - "Failed to decompress deployed contracts." - ); - return Ok(()); - } - }; - let contract_codes = contracts.into_iter().map(|contract| contract.into()).collect(); - let runtime = self.runtime.clone(); - self.compile_contracts_spawner.spawn("precompile_deployed_contracts", move || { - if let Err(err) = runtime.precompile_contracts(&key.epoch_id, contract_codes) { - tracing::error!( - target: "client", - ?err, - ?key, - "Failed to precompile deployed contracts." - ); - } - }); - } - - Ok(()) - } - - /// Handles the state witness ack message from the chunk validator. - /// It computes the round-trip time between sending the state witness and receiving - /// the ack message and updates the corresponding metric with it. - /// Currently we do not raise an error for handling of witness-ack messages, - /// as it is used only for tracking some networking metrics. - fn handle_chunk_state_witness_ack(&mut self, witness_ack: ChunkStateWitnessAck) { - self.state_witness_tracker.on_witness_ack_received(witness_ack); - } - - /// Handles contract code accesses message from chunk producer. - /// This is sent in parallel to a chunk state witness and contains the hashes - /// of the contract code accessed when applying the previous chunk of the witness. - fn handle_chunk_contract_accesses( - &mut self, - accesses: ChunkContractAccesses, - ) -> Result<(), Error> { - let signer = self.my_validator_signer()?; - if !validate_chunk_contract_accesses( - self.epoch_manager.as_ref(), - &accesses, - &signer, - self.runtime.store(), - )? { - return Ok(()); - } - let key = accesses.chunk_production_key(); - let contracts_cache = self.runtime.compiled_contract_cache(); - let runtime_config = self - .runtime - .get_runtime_config(self.epoch_manager.get_epoch_protocol_version(&key.epoch_id)?)?; - let missing_contract_hashes = HashSet::from_iter( - accesses - .contracts() - .iter() - .filter(|&hash| { - !contracts_cache_contains_contract(contracts_cache, hash, &runtime_config) - }) - .cloned(), - ); - if missing_contract_hashes.is_empty() { - return Ok(()); - } - self.partial_witness_tracker - .store_accessed_contract_hashes(key.clone(), missing_contract_hashes.clone())?; - let random_chunk_producer = { - let mut chunk_producers = self - .epoch_manager - .get_epoch_chunk_producers_for_shard(&key.epoch_id, key.shard_id)?; - chunk_producers.swap_remove(rand::thread_rng().gen_range(0..chunk_producers.len())) - }; - let request = ContractCodeRequest::new( - key.clone(), - missing_contract_hashes, - accesses.main_transition().clone(), - &signer, - ); - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::ContractCodeRequest(random_chunk_producer, request), - )); - Ok(()) - } - - /// Sends the contract accesses to the same chunk validators - /// (except for the chunk producers that track the same shard), - /// which will receive the state witness for the new chunk. - fn send_contract_accesses_to_chunk_validators( - &self, - key: ChunkProductionKey, - contract_accesses: HashSet, - main_transition: MainTransitionKey, - chunk_validators: &[AccountId], - my_signer: &ValidatorSigner, - ) { - let chunk_producers: HashSet = self - .epoch_manager - .get_epoch_chunk_producers_for_shard(&key.epoch_id, key.shard_id) - .expect("Chunk producers must be defined") - .into_iter() - .collect(); - - // Exclude chunk producers that track the same shard from the target list, since they track the state that contains the respective code. - let target_chunk_validators = chunk_validators - .iter() - .filter(|validator| !chunk_producers.contains(*validator)) - .cloned() - .collect(); - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::ChunkContractAccesses( - target_chunk_validators, - ChunkContractAccesses::new(key, contract_accesses, main_transition, my_signer), - ), - )); - } - - /// Retrieves the code for the given contract hashes and distributes them to validator in parts. - /// - /// This implements the first step of distributing contract code to validators where the contract codes - /// are compressed and encoded into parts using Reed Solomon encoding, and then each part is sent to - /// one of the validators (part-owner). Second step of the distribution, where each validator (part-owner) - /// forwards the part it receives is implemented in `handle_partial_encoded_contract_deploys`. - fn send_chunk_contract_deploys_parts( - &mut self, - key: ChunkProductionKey, - contract_codes: Vec, - ) -> Result<(), Error> { - let contracts = contract_codes.into_iter().map(|contract| contract.into()).collect(); - let compressed_deploys = ChunkContractDeploys::compress_contracts(&contracts)?; - let validator_parts = self.generate_contract_deploys_parts(&key, compressed_deploys)?; - for (part_owner, deploys_part) in validator_parts.into_iter() { - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::PartialEncodedContractDeploys(vec![part_owner], deploys_part), - )); - } - Ok(()) - } - - /// Handles contract code requests message from chunk validators. - /// As response to this message, sends the contract code requested to - /// the requesting chunk validator for the given hashes of the contract code. - fn handle_contract_code_request(&mut self, request: ContractCodeRequest) -> Result<(), Error> { - if !validate_contract_code_request( - self.epoch_manager.as_ref(), - &request, - self.runtime.store(), - )? { - return Ok(()); - } - - let key = request.chunk_production_key(); - let processed_requests_key = (key.clone(), request.requester().clone()); - if self.processed_contract_code_requests.contains(&processed_requests_key) { - tracing::warn!( - target: "client", - ?processed_requests_key, - "Contract code request from this account was already processed" - ); - return Ok(()); - } - self.processed_contract_code_requests.push(processed_requests_key, ()); - - let _timer = near_chain::stateless_validation::metrics::PROCESS_CONTRACT_CODE_REQUEST_TIME - .with_label_values(&[&key.shard_id.to_string()]) - .start_timer(); - - let main_transition_key = request.main_transition(); - let Some(transition_data) = - self.runtime.store().get_ser::( - DBCol::StateTransitionData, - &near_primitives::utils::get_block_shard_id( - &main_transition_key.block_hash, - main_transition_key.shard_id, - ), - )? - else { - tracing::warn!( - target: "client", - ?key, - ?main_transition_key, - "Missing state transition data" - ); - return Ok(()); - }; - let valid_accesses: HashSet = - transition_data.contract_accesses().iter().cloned().collect(); - - let storage = TrieDBStorage::new( - TrieStoreAdapter::new(self.runtime.store().clone()), - self.epoch_manager.shard_id_to_uid( - main_transition_key.shard_id, - &self.epoch_manager.get_epoch_id(&main_transition_key.block_hash)?, - )?, ); - let mut contracts = Vec::new(); - for contract_hash in request.contracts() { - if !valid_accesses.contains(contract_hash) { - tracing::warn!( - target: "client", - ?key, - ?contract_hash, - "Requested contract code was not accessed when applying the chunk" - ); - return Ok(()); - } - match storage.retrieve_raw_bytes(&contract_hash.0) { - Ok(bytes) => contracts.push(CodeBytes(bytes)), - Err(StorageError::MissingTrieValue(_, _)) => { - tracing::warn!( - target: "client", - ?contract_hash, - chunk_production_key = ?key, - "Requested contract hash is not present in the storage" - ); - return Ok(()); - } - Err(err) => return Err(err.into()), - } - } - let response = ContractCodeResponse::encode(key.clone(), &contracts)?; - self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( - NetworkRequests::ContractCodeResponse(request.requester().clone(), response), - )); - Ok(()) + Self { tx } } - - /// Handles contract code responses message from chunk producer. - fn handle_contract_code_response( - &mut self, - response: ContractCodeResponse, - ) -> Result<(), Error> { - let key = response.chunk_production_key().clone(); - let contracts = response.decompress_contracts()?; - self.partial_witness_tracker.store_accessed_contract_codes(key, contracts) - } - - fn my_validator_signer(&self) -> Result, Error> { - self.my_signer.get().ok_or_else(|| Error::NotAValidator("not a validator".to_owned())) - } - - fn contract_deploys_encoder(&mut self, validators_count: usize) -> Arc { - self.contract_deploys_encoders.entry(validators_count) - } - - fn ordered_contract_deploys_validators( - &mut self, - key: &ChunkProductionKey, - ) -> Result, Error> { - let chunk_producers = HashSet::::from_iter( - self.epoch_manager.get_epoch_chunk_producers_for_shard(&key.epoch_id, key.shard_id)?, - ); - let mut validators = self - .epoch_manager - .get_epoch_all_validators(&key.epoch_id)? - .into_iter() - .filter(|stake| !chunk_producers.contains(stake.account_id())) - .map(|stake| stake.account_id().clone()) - .collect::>(); - validators.sort(); - Ok(validators) - } -} - -fn compress_witness(witness: &ChunkStateWitness) -> Result { - let shard_id_label = witness.chunk_header.shard_id().to_string(); - let encode_timer = near_chain::stateless_validation::metrics::CHUNK_STATE_WITNESS_ENCODE_TIME - .with_label_values(&[shard_id_label.as_str()]) - .start_timer(); - let (witness_bytes, raw_witness_size) = EncodedChunkStateWitness::encode(witness)?; - encode_timer.observe_duration(); - - near_chain::stateless_validation::metrics::record_witness_size_metrics( - raw_witness_size, - witness_bytes.size_bytes(), - witness, - ); - Ok(witness_bytes) -} - -fn contracts_cache_contains_contract( - cache: &dyn ContractRuntimeCache, - contract_hash: &CodeHash, - runtime_config: &RuntimeConfig, -) -> bool { - let cache_key = get_contract_cache_key(contract_hash.0, &runtime_config.wasm_config); - cache.memory_cache().contains(cache_key) || cache.has(&cache_key).is_ok_and(|has| has) } diff --git a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs new file mode 100644 index 00000000000..ff3e326e505 --- /dev/null +++ b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs @@ -0,0 +1,864 @@ +use std::collections::HashSet; +use std::num::NonZeroUsize; +use std::sync::Arc; + +use itertools::Itertools; +use lru::LruCache; +use near_async::futures::{AsyncComputationSpawner, AsyncComputationSpawnerExt}; +use near_async::messaging::{CanSend, Sender}; +use near_async::time::Clock; +use near_async::{MultiSend, MultiSenderFrom}; +use near_chain::types::RuntimeAdapter; +use near_chain::Error; +use near_chain_configs::MutableValidatorSigner; +use near_epoch_manager::EpochManagerAdapter; +use near_network::state_witness::{ + ChunkContractAccessesMessage, ChunkStateWitnessAckMessage, ContractCodeRequestMessage, + ContractCodeResponseMessage, PartialEncodedContractDeploysMessage, + PartialEncodedStateWitnessForwardMessage, PartialEncodedStateWitnessMessage, +}; +use near_network::types::{NetworkRequests, PeerManagerAdapter, PeerManagerMessageRequest}; +use near_parameters::RuntimeConfig; +use near_primitives::reed_solomon::{ReedSolomonEncoder, ReedSolomonEncoderCache}; +use near_primitives::sharding::ShardChunkHeader; +use near_primitives::stateless_validation::contract_distribution::{ + ChunkContractAccesses, ChunkContractDeploys, CodeBytes, CodeHash, ContractCodeRequest, + ContractCodeResponse, ContractUpdates, MainTransitionKey, PartialEncodedContractDeploys, + PartialEncodedContractDeploysPart, +}; +use near_primitives::stateless_validation::partial_witness::PartialEncodedStateWitness; +use near_primitives::stateless_validation::state_witness::{ + ChunkStateWitness, ChunkStateWitnessAck, EncodedChunkStateWitness, +}; +use near_primitives::stateless_validation::stored_chunk_state_transition_data::StoredChunkStateTransitionData; +use near_primitives::stateless_validation::ChunkProductionKey; +use near_primitives::types::{AccountId, EpochId}; +use near_primitives::validator_signer::ValidatorSigner; +use near_store::adapter::trie_store::TrieStoreAdapter; +use near_store::{DBCol, StorageError, TrieDBStorage, TrieStorage}; +use near_vm_runner::{get_contract_cache_key, ContractCode, ContractRuntimeCache}; +use rand::Rng; + +use crate::client_actor::ClientSenderForPartialWitness; +use crate::stateless_validation::state_witness_tracker::ChunkStateWitnessTracker; +use crate::stateless_validation::validate::{ + validate_chunk_contract_accesses, validate_contract_code_request, + validate_partial_encoded_contract_deploys, validate_partial_encoded_state_witness, +}; +use crate::{metrics, DistributeStateWitnessRequest}; + +use super::encoding::{CONTRACT_DEPLOYS_RATIO_DATA_PARTS, WITNESS_RATIO_DATA_PARTS}; +use super::partial_deploys_tracker::PartialEncodedContractDeploysTracker; +use super::partial_witness_tracker::PartialEncodedStateWitnessTracker; +use near_primitives::utils::compression::CompressedData; +use tokio::sync::mpsc::{self, error::SendError, Receiver, Sender as MpscSender}; + +const PROCESSED_CONTRACT_CODE_REQUESTS_CACHE_SIZE: usize = 30; + +#[derive(Debug)] +pub enum PartialWitnessMsg { + DistributeStateWitnessRequest(Box), + ChunkStateWitnessAckMessage(ChunkStateWitnessAckMessage), + PartialEncodedStateWitnessMessage(PartialEncodedStateWitnessMessage), + PartialEncodedStateWitnessForwardMessage(PartialEncodedStateWitnessForwardMessage), + ChunkContractAccessesMessage(ChunkContractAccessesMessage), + PartialEncodedContractDeploysMessage(PartialEncodedContractDeploysMessage), + ContractCodeRequestMessage(ContractCodeRequestMessage), + ContractCodeResponseMessage(ContractCodeResponseMessage), +} + +#[derive(Clone)] +pub struct PartialWitnessSender(MpscSender); + +impl PartialWitnessSender { + /// Send a message to the Partial Witness Service (async). + pub async fn send(&self, msg: PartialWitnessMsg) -> Result<(), SendError> { + self.0.send(msg).await + } +} + +pub struct PartialWitnessService { + rx: Receiver, + /// Adapter to send messages to the network. + network_adapter: PeerManagerAdapter, + /// Validator signer to sign the state witness. This field is mutable and optional. Use with caution! + /// Lock the value of mutable validator signer for the duration of a request to ensure consistency. + /// Please note that the locked value should not be stored anywhere or passed through the thread boundary. + my_signer: MutableValidatorSigner, + epoch_manager: Arc, + runtime: Arc, + /// Tracks the parts of the state witness sent from chunk producers to chunk validators. + partial_witness_tracker: Arc, + partial_deploys_tracker: PartialEncodedContractDeploysTracker, + /// Tracks a collection of state witnesses sent from chunk producers to chunk validators. + state_witness_tracker: ChunkStateWitnessTracker, + /// Reed Solomon encoder for encoding state witness parts. + /// We keep one wrapper for each length of chunk_validators to avoid re-creating the encoder. + witness_encoders: ReedSolomonEncoderCache, + /// Same as above for contract deploys. + contract_deploys_encoders: ReedSolomonEncoderCache, + compile_contracts_spawner: Arc, + partial_witness_spawner: Arc, + /// AccountId in the key corresponds to the requester (chunk validator). + processed_contract_code_requests: LruCache<(ChunkProductionKey, AccountId), ()>, +} + +#[derive(Clone, MultiSend, MultiSenderFrom)] +pub struct PartialWitnessSenderForClient { + pub distribute_chunk_state_witness: Sender, +} + +impl PartialWitnessService { + pub fn new( + clock: Clock, + network_adapter: PeerManagerAdapter, + client_sender: ClientSenderForPartialWitness, + my_signer: MutableValidatorSigner, + epoch_manager: Arc, + runtime: Arc, + compile_contracts_spawner: Arc, + partial_witness_spawner: Arc, + ) -> PartialWitnessSender { + let (tx, rx) = mpsc::channel(1024); + + let partial_witness_tracker = + Arc::new(PartialEncodedStateWitnessTracker::new(client_sender, epoch_manager.clone())); + + let actor = Self { + rx, + network_adapter, + my_signer, + epoch_manager, + runtime, + partial_witness_tracker, + partial_deploys_tracker: PartialEncodedContractDeploysTracker::new(), + state_witness_tracker: ChunkStateWitnessTracker::new(clock), + witness_encoders: ReedSolomonEncoderCache::new(WITNESS_RATIO_DATA_PARTS), + contract_deploys_encoders: ReedSolomonEncoderCache::new( + CONTRACT_DEPLOYS_RATIO_DATA_PARTS, + ), + compile_contracts_spawner, + partial_witness_spawner, + processed_contract_code_requests: LruCache::new( + NonZeroUsize::new(PROCESSED_CONTRACT_CODE_REQUESTS_CACHE_SIZE).unwrap(), + ), + }; + + tokio::spawn(async move { + actor.run().await.expect("Failed to run PartialWitnessActor"); + }); + + let sender = PartialWitnessSender(tx); + sender + } + + /// Main async loop processing all incoming PartialWitnessMsg. + pub async fn run(mut self) -> Result<(), Error> { + while let Some(msg) = self.rx.recv().await { + // Match on the enum variant and dispatch the appropriate handler: + match msg { + PartialWitnessMsg::DistributeStateWitnessRequest(req) => { + if let Err(err) = self.handle_distribute_state_witness_request(req).await { + tracing::error!(target: "client", ?err, "Failed to handle distribute chunk state witness request"); + } + } + + PartialWitnessMsg::ChunkStateWitnessAckMessage(msg) => { + self.handle_chunk_state_witness_ack(msg.0).await; + } + + PartialWitnessMsg::PartialEncodedStateWitnessMessage(msg) => { + if let Err(err) = self.handle_partial_encoded_state_witness(msg.0).await { + tracing::error!(target: "client", ?err, "Failed to handle PartialEncodedStateWitnessMessage"); + } + } + + PartialWitnessMsg::PartialEncodedStateWitnessForwardMessage(msg) => { + if let Err(err) = self.handle_partial_encoded_state_witness_forward(msg.0).await + { + tracing::error!(target: "client", ?err, "Failed to handle PartialEncodedStateWitnessForwardMessage"); + } + } + + PartialWitnessMsg::ChunkContractAccessesMessage(msg) => { + if let Err(err) = self.handle_chunk_contract_accesses(msg.0).await { + tracing::error!(target: "client", ?err, "Failed to handle ChunkContractAccessesMessage"); + } + } + + PartialWitnessMsg::PartialEncodedContractDeploysMessage(msg) => { + if let Err(err) = self.handle_partial_encoded_contract_deploys(msg.0).await { + tracing::error!(target: "client", ?err, "Failed to handle PartialEncodedContractDeploysMessage"); + } + } + + PartialWitnessMsg::ContractCodeRequestMessage(msg) => { + if let Err(err) = self.handle_contract_code_request(msg.0).await { + tracing::error!(target: "client", ?err, "Failed to handle ContractCodeRequestMessage"); + } + } + + PartialWitnessMsg::ContractCodeResponseMessage(msg) => { + if let Err(err) = self.handle_contract_code_response(msg.0).await { + tracing::error!(target: "client", ?err, "Failed to handle ContractCodeResponseMessage"); + } + } + } + } + + Ok(()) + } + + async fn handle_distribute_state_witness_request( + &mut self, + msg: Box, + ) -> Result<(), Error> { + let DistributeStateWitnessRequest { + state_witness, + contract_updates: ContractUpdates { contract_accesses, contract_deploys }, + main_transition_shard_id, + } = *msg; + + tracing::debug!( + target: "client", + chunk_hash=?state_witness.chunk_header.chunk_hash(), + "distribute_chunk_state_witness", + ); + + // We send the state-witness and contract-updates in the following order: + // 1. We send the hashes of the contract code accessed (if contract code is excluded from witness and any contracts are called) + // before the state witness in order to allow validators to check and request missing contract code, while waiting for witness parts. + // 2. We send the state witness parts to witness-part owners. + // 3. We send the contract deploys parts to other validators (that do not validate the witness in this turn). This is lower priority + // since the newly-deployed contracts will be needed by other validators in later turns. + + let signer = self.my_validator_signer()?; + let key = state_witness.chunk_production_key(); + let chunk_validators = self + .epoch_manager + .get_chunk_validator_assignments(&key.epoch_id, key.shard_id, key.height_created) + .expect("Chunk validators must be defined") + .ordered_chunk_validators(); + + if !contract_accesses.is_empty() { + self.send_contract_accesses_to_chunk_validators( + key.clone(), + contract_accesses, + MainTransitionKey { + block_hash: state_witness.main_state_transition.block_hash, + shard_id: main_transition_shard_id, + }, + &chunk_validators, + &signer, + ); + } + + let witness_bytes = compress_witness(&state_witness)?; + self.send_state_witness_parts( + key.epoch_id, + &state_witness.chunk_header, + witness_bytes, + &chunk_validators, + &signer, + ); + + if !contract_deploys.is_empty() { + self.send_chunk_contract_deploys_parts(key, contract_deploys)?; + } + Ok(()) + } + + // Function to generate the parts of the state witness and return them as a tuple of chunk_validator and part. + fn generate_state_witness_parts( + &mut self, + epoch_id: EpochId, + chunk_header: &ShardChunkHeader, + witness_bytes: EncodedChunkStateWitness, + chunk_validators: &[AccountId], + signer: &ValidatorSigner, + ) -> Vec<(AccountId, PartialEncodedStateWitness)> { + tracing::debug!( + target: "client", + chunk_hash=?chunk_header.chunk_hash(), + ?chunk_validators, + "generate_state_witness_parts", + ); + + // Break the state witness into parts using Reed Solomon encoding. + let encoder = self.witness_encoders.entry(chunk_validators.len()); + let (parts, encoded_length) = encoder.encode(&witness_bytes); + + chunk_validators + .iter() + .zip_eq(parts) + .enumerate() + .map(|(part_ord, (chunk_validator, part))| { + // It's fine to unwrap part here as we just constructed the parts above and we expect + // all of them to be present. + let partial_witness = PartialEncodedStateWitness::new( + epoch_id, + chunk_header.clone(), + part_ord, + part.unwrap().to_vec(), + encoded_length, + signer, + ); + (chunk_validator.clone(), partial_witness) + }) + .collect_vec() + } + + fn generate_contract_deploys_parts( + &mut self, + key: &ChunkProductionKey, + deploys: ChunkContractDeploys, + ) -> Result, Error> { + let validators = self.ordered_contract_deploys_validators(key)?; + // Note that target validators do not include the chunk producers, and thus in some case + // (eg. tests or small networks) there may be no other validators to send the new contracts to. + if validators.is_empty() { + return Ok(vec![]); + } + + let encoder = self.contract_deploys_encoder(validators.len()); + let (parts, encoded_length) = encoder.encode(&deploys); + let signer = self.my_validator_signer()?; + + Ok(validators + .into_iter() + .zip_eq(parts) + .enumerate() + .map(|(part_ord, (validator, part))| { + let partial_deploys = PartialEncodedContractDeploys::new( + key.clone(), + PartialEncodedContractDeploysPart { + part_ord, + data: part.unwrap().to_vec().into_boxed_slice(), + encoded_length, + }, + &signer, + ); + (validator, partial_deploys) + }) + .collect_vec()) + } + + // Break the state witness into parts and send each part to the corresponding chunk validator owner. + // The chunk validator owner will then forward the part to all other chunk validators. + // Each chunk validator would collect the parts and reconstruct the state witness. + fn send_state_witness_parts( + &mut self, + epoch_id: EpochId, + chunk_header: &ShardChunkHeader, + witness_bytes: EncodedChunkStateWitness, + chunk_validators: &[AccountId], + signer: &ValidatorSigner, + ) { + // Capture these values first, as the sources are consumed before calling record_witness_sent. + let chunk_hash = chunk_header.chunk_hash(); + let witness_size_in_bytes = witness_bytes.size_bytes(); + + // Record time taken to encode the state witness parts. + let shard_id_label = chunk_header.shard_id().to_string(); + let encode_timer = metrics::PARTIAL_WITNESS_ENCODE_TIME + .with_label_values(&[shard_id_label.as_str()]) + .start_timer(); + let validator_witness_tuple = self.generate_state_witness_parts( + epoch_id, + chunk_header, + witness_bytes, + chunk_validators, + signer, + ); + encode_timer.observe_duration(); + + // Record the witness in order to match the incoming acks for measuring round-trip times. + // See process_chunk_state_witness_ack for the handling of the ack messages. + self.state_witness_tracker.record_witness_sent( + chunk_hash, + witness_size_in_bytes, + validator_witness_tuple.len(), + ); + + // Send the parts to the corresponding chunk validator owners. + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::PartialEncodedStateWitness(validator_witness_tuple), + )); + } + + /// Function to handle receiving partial_encoded_state_witness message from chunk producer. + async fn handle_partial_encoded_state_witness( + &mut self, + partial_witness: PartialEncodedStateWitness, + ) -> Result<(), Error> { + tracing::debug!(target: "client", ?partial_witness, "Receive PartialEncodedStateWitnessMessage"); + let signer = self.my_validator_signer()?; + let epoch_manager = self.epoch_manager.clone(); + let runtime_adapter = self.runtime.clone(); + + let ChunkProductionKey { shard_id, epoch_id, height_created } = + partial_witness.chunk_production_key(); + + let chunk_producer = self + .epoch_manager + .get_chunk_producer_info(&ChunkProductionKey { epoch_id, height_created, shard_id })? + .take_account_id(); + + // Forward witness part to chunk validators except the validator that produced the chunk and witness. + let target_chunk_validators = self + .epoch_manager + .get_chunk_validator_assignments(&epoch_id, shard_id, height_created)? + .ordered_chunk_validators() + .into_iter() + .filter(|validator| validator != &chunk_producer) + .collect(); + + let pw_clone = partial_witness.clone(); + let validation = tokio::task::spawn_blocking(move || { + validate_partial_encoded_state_witness( + epoch_manager.as_ref(), + &pw_clone, + &signer, + runtime_adapter.store(), + ) + }) + .await + .expect("Failed to validate partial encoded state witness"); + + // Validate the partial encoded state witness and forward the part to all the chunk validators. + match validation { + Ok(true) => { + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::PartialEncodedStateWitnessForward( + target_chunk_validators, + partial_witness, + ), + )); + } + Ok(false) => { + tracing::warn!( + target: "client", + "Received invalid partial encoded state witness" + ); + } + Err(err) => { + tracing::warn!( + target: "client", + "Encountered error during validation: {}", + err + ); + } + } + + Ok(()) + } + + /// Function to handle receiving partial_encoded_state_witness_forward message from chunk producer. + async fn handle_partial_encoded_state_witness_forward( + &mut self, + partial_witness: PartialEncodedStateWitness, + ) -> Result<(), Error> { + tracing::debug!(target: "client", ?partial_witness, "Receive PartialEncodedStateWitnessForwardMessage"); + + let signer = self.my_validator_signer()?; + let partial_witness_tracker = self.partial_witness_tracker.clone(); + let epoch_manager = self.epoch_manager.clone(); + let runtime_adapter = self.runtime.clone(); + self.partial_witness_spawner.spawn( + "handle_partial_encoded_state_witness_forward", + move || { + // Validate the partial encoded state witness and store the partial encoded state witness. + match validate_partial_encoded_state_witness( + epoch_manager.as_ref(), + &partial_witness, + &signer, + runtime_adapter.store(), + ) { + Ok(true) => { + if let Err(err) = partial_witness_tracker.store_partial_encoded_state_witness(partial_witness) { + tracing::error!(target: "client", "Failed to store partial encoded state witness: {}", err); + } + } + Ok(false) => { + tracing::warn!( + target: "client", + "Received invalid partial encoded state witness" + ); + } + Err(err) => { + tracing::warn!( + target: "client", + "Encountered error during validation: {}", + err + ); + } + } + }, + ); + + Ok(()) + } + + /// Handles partial contract deploy message received from a peer. + /// + /// This message may belong to one of two steps of distributing contract code. In the first step the code is compressed + /// and encoded into parts using Reed Solomon encoding and each part is sent to one of the validators (part owner). + /// See `send_chunk_contract_deploys_parts` for the code implementing this. In the second step each validator (part-owner) + /// forwards the part it receives to other validators. + async fn handle_partial_encoded_contract_deploys( + &mut self, + partial_deploys: PartialEncodedContractDeploys, + ) -> Result<(), Error> { + tracing::debug!(target: "client", ?partial_deploys, "Receive PartialEncodedContractDeploys"); + if !validate_partial_encoded_contract_deploys( + self.epoch_manager.as_ref(), + &partial_deploys, + self.runtime.store(), + )? { + return Ok(()); + } + if self.partial_deploys_tracker.already_processed(&partial_deploys) { + return Ok(()); + } + let key = partial_deploys.chunk_production_key().clone(); + let validators = self.ordered_contract_deploys_validators(&key)?; + if validators.is_empty() { + // Note that target validators do not include the chunk producers, and thus in some case + // (eg. tests or small networks) there may be no other validators to send the new contracts to. + // In such case, the message we are handling here should not be sent in the first place, + // unless there is a bug or adversarial behavior that sends the message. + debug_assert!(false, "No target validators, we must not receive this message"); + return Ok(()); + } + + // Forward to other validators if the part received is my part + let signer = self.my_validator_signer()?; + let my_account_id = signer.validator_id(); + let Some(my_part_ord) = validators.iter().position(|validator| validator == my_account_id) + else { + tracing::warn!( + target: "client", + ?key, + "Validator is not a part of contract deploys distribution" + ); + return Ok(()); + }; + if partial_deploys.part().part_ord == my_part_ord { + let other_validators = validators + .iter() + .filter(|&validator| validator != my_account_id) + .cloned() + .collect_vec(); + if !other_validators.is_empty() { + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::PartialEncodedContractDeploys( + other_validators, + partial_deploys.clone(), + ), + )); + } + } + + // Store part + let encoder = self.contract_deploys_encoder(validators.len()); + if let Some(deploys) = self + .partial_deploys_tracker + .store_partial_encoded_contract_deploys(partial_deploys, encoder)? + { + let contracts = match deploys.decompress_contracts() { + Ok(contracts) => contracts, + Err(err) => { + tracing::warn!( + target: "client", + ?err, + ?key, + "Failed to decompress deployed contracts." + ); + return Ok(()); + } + }; + let contract_codes = contracts.into_iter().map(|contract| contract.into()).collect(); + let runtime = self.runtime.clone(); + self.compile_contracts_spawner.spawn("precompile_deployed_contracts", move || { + if let Err(err) = runtime.precompile_contracts(&key.epoch_id, contract_codes) { + tracing::error!( + target: "client", + ?err, + ?key, + "Failed to precompile deployed contracts." + ); + } + }); + } + + Ok(()) + } + + /// Handles the state witness ack message from the chunk validator. + /// It computes the round-trip time between sending the state witness and receiving + /// the ack message and updates the corresponding metric with it. + /// Currently we do not raise an error for handling of witness-ack messages, + /// as it is used only for tracking some networking metrics. + async fn handle_chunk_state_witness_ack(&mut self, witness_ack: ChunkStateWitnessAck) { + self.state_witness_tracker.on_witness_ack_received(witness_ack); + } + + /// Handles contract code accesses message from chunk producer. + /// This is sent in parallel to a chunk state witness and contains the hashes + /// of the contract code accessed when applying the previous chunk of the witness. + async fn handle_chunk_contract_accesses( + &mut self, + accesses: ChunkContractAccesses, + ) -> Result<(), Error> { + let signer = self.my_validator_signer()?; + if !validate_chunk_contract_accesses( + self.epoch_manager.as_ref(), + &accesses, + &signer, + self.runtime.store(), + )? { + return Ok(()); + } + let key = accesses.chunk_production_key(); + let contracts_cache = self.runtime.compiled_contract_cache(); + let runtime_config = self + .runtime + .get_runtime_config(self.epoch_manager.get_epoch_protocol_version(&key.epoch_id)?)?; + let missing_contract_hashes = HashSet::from_iter( + accesses + .contracts() + .iter() + .filter(|&hash| { + !contracts_cache_contains_contract(contracts_cache, hash, &runtime_config) + }) + .cloned(), + ); + if missing_contract_hashes.is_empty() { + return Ok(()); + } + self.partial_witness_tracker + .store_accessed_contract_hashes(key.clone(), missing_contract_hashes.clone())?; + let random_chunk_producer = { + let mut chunk_producers = self + .epoch_manager + .get_epoch_chunk_producers_for_shard(&key.epoch_id, key.shard_id)?; + chunk_producers.swap_remove(rand::thread_rng().gen_range(0..chunk_producers.len())) + }; + let request = ContractCodeRequest::new( + key.clone(), + missing_contract_hashes, + accesses.main_transition().clone(), + &signer, + ); + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::ContractCodeRequest(random_chunk_producer, request), + )); + Ok(()) + } + + /// Sends the contract accesses to the same chunk validators + /// (except for the chunk producers that track the same shard), + /// which will receive the state witness for the new chunk. + fn send_contract_accesses_to_chunk_validators( + &self, + key: ChunkProductionKey, + contract_accesses: HashSet, + main_transition: MainTransitionKey, + chunk_validators: &[AccountId], + my_signer: &ValidatorSigner, + ) { + let chunk_producers: HashSet = self + .epoch_manager + .get_epoch_chunk_producers_for_shard(&key.epoch_id, key.shard_id) + .expect("Chunk producers must be defined") + .into_iter() + .collect(); + + // Exclude chunk producers that track the same shard from the target list, since they track the state that contains the respective code. + let target_chunk_validators = chunk_validators + .iter() + .filter(|validator| !chunk_producers.contains(*validator)) + .cloned() + .collect(); + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::ChunkContractAccesses( + target_chunk_validators, + ChunkContractAccesses::new(key, contract_accesses, main_transition, my_signer), + ), + )); + } + + /// Retrieves the code for the given contract hashes and distributes them to validator in parts. + /// + /// This implements the first step of distributing contract code to validators where the contract codes + /// are compressed and encoded into parts using Reed Solomon encoding, and then each part is sent to + /// one of the validators (part-owner). Second step of the distribution, where each validator (part-owner) + /// forwards the part it receives is implemented in `handle_partial_encoded_contract_deploys`. + fn send_chunk_contract_deploys_parts( + &mut self, + key: ChunkProductionKey, + contract_codes: Vec, + ) -> Result<(), Error> { + let contracts = contract_codes.into_iter().map(|contract| contract.into()).collect(); + let compressed_deploys = ChunkContractDeploys::compress_contracts(&contracts)?; + let validator_parts = self.generate_contract_deploys_parts(&key, compressed_deploys)?; + for (part_owner, deploys_part) in validator_parts.into_iter() { + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::PartialEncodedContractDeploys(vec![part_owner], deploys_part), + )); + } + Ok(()) + } + + /// Handles contract code requests message from chunk validators. + /// As response to this message, sends the contract code requested to + /// the requesting chunk validator for the given hashes of the contract code. + async fn handle_contract_code_request( + &mut self, + request: ContractCodeRequest, + ) -> Result<(), Error> { + if !validate_contract_code_request( + self.epoch_manager.as_ref(), + &request, + self.runtime.store(), + )? { + return Ok(()); + } + + let key = request.chunk_production_key(); + let processed_requests_key = (key.clone(), request.requester().clone()); + if self.processed_contract_code_requests.contains(&processed_requests_key) { + tracing::warn!( + target: "client", + ?processed_requests_key, + "Contract code request from this account was already processed" + ); + return Ok(()); + } + self.processed_contract_code_requests.push(processed_requests_key, ()); + + let _timer = near_chain::stateless_validation::metrics::PROCESS_CONTRACT_CODE_REQUEST_TIME + .with_label_values(&[&key.shard_id.to_string()]) + .start_timer(); + + let main_transition_key = request.main_transition(); + let Some(transition_data) = + self.runtime.store().get_ser::( + DBCol::StateTransitionData, + &near_primitives::utils::get_block_shard_id( + &main_transition_key.block_hash, + main_transition_key.shard_id, + ), + )? + else { + tracing::warn!( + target: "client", + ?key, + ?main_transition_key, + "Missing state transition data" + ); + return Ok(()); + }; + let valid_accesses: HashSet = + transition_data.contract_accesses().iter().cloned().collect(); + + let storage = TrieDBStorage::new( + TrieStoreAdapter::new(self.runtime.store().clone()), + self.epoch_manager.shard_id_to_uid( + main_transition_key.shard_id, + &self.epoch_manager.get_epoch_id(&main_transition_key.block_hash)?, + )?, + ); + let mut contracts = Vec::new(); + for contract_hash in request.contracts() { + if !valid_accesses.contains(contract_hash) { + tracing::warn!( + target: "client", + ?key, + ?contract_hash, + "Requested contract code was not accessed when applying the chunk" + ); + return Ok(()); + } + match storage.retrieve_raw_bytes(&contract_hash.0) { + Ok(bytes) => contracts.push(CodeBytes(bytes)), + Err(StorageError::MissingTrieValue(_, _)) => { + tracing::warn!( + target: "client", + ?contract_hash, + chunk_production_key = ?key, + "Requested contract hash is not present in the storage" + ); + return Ok(()); + } + Err(err) => return Err(err.into()), + } + } + let response = ContractCodeResponse::encode(key.clone(), &contracts)?; + self.network_adapter.send(PeerManagerMessageRequest::NetworkRequests( + NetworkRequests::ContractCodeResponse(request.requester().clone(), response), + )); + Ok(()) + } + + /// Handles contract code responses message from chunk producer. + async fn handle_contract_code_response( + &mut self, + response: ContractCodeResponse, + ) -> Result<(), Error> { + let key = response.chunk_production_key().clone(); + let contracts = response.decompress_contracts()?; + self.partial_witness_tracker.store_accessed_contract_codes(key, contracts) + } + + fn my_validator_signer(&self) -> Result, Error> { + self.my_signer.get().ok_or_else(|| Error::NotAValidator("not a validator".to_owned())) + } + + fn contract_deploys_encoder(&mut self, validators_count: usize) -> Arc { + self.contract_deploys_encoders.entry(validators_count) + } + + fn ordered_contract_deploys_validators( + &mut self, + key: &ChunkProductionKey, + ) -> Result, Error> { + let chunk_producers = HashSet::::from_iter( + self.epoch_manager.get_epoch_chunk_producers_for_shard(&key.epoch_id, key.shard_id)?, + ); + let mut validators = self + .epoch_manager + .get_epoch_all_validators(&key.epoch_id)? + .into_iter() + .filter(|stake| !chunk_producers.contains(stake.account_id())) + .map(|stake| stake.account_id().clone()) + .collect::>(); + validators.sort(); + Ok(validators) + } +} + +fn compress_witness(witness: &ChunkStateWitness) -> Result { + let shard_id_label = witness.chunk_header.shard_id().to_string(); + let encode_timer = near_chain::stateless_validation::metrics::CHUNK_STATE_WITNESS_ENCODE_TIME + .with_label_values(&[shard_id_label.as_str()]) + .start_timer(); + let (witness_bytes, raw_witness_size) = EncodedChunkStateWitness::encode(witness)?; + encode_timer.observe_duration(); + + near_chain::stateless_validation::metrics::record_witness_size_metrics( + raw_witness_size, + witness_bytes.size_bytes(), + witness, + ); + Ok(witness_bytes) +} + +fn contracts_cache_contains_contract( + cache: &dyn ContractRuntimeCache, + contract_hash: &CodeHash, + runtime_config: &RuntimeConfig, +) -> bool { + let cache_key = get_contract_cache_key(contract_hash.0, &runtime_config.wasm_config); + cache.memory_cache().contains(cache_key) || cache.has(&cache_key).is_ok_and(|has| has) +} From 76280aa4fb63180c287f06ced463781f44c00539 Mon Sep 17 00:00:00 2001 From: Stefan Neamtu Date: Tue, 7 Jan 2025 15:58:58 +0200 Subject: [PATCH 10/13] . --- .../partial_witness/partial_witness_actor.rs | 3 +++ .../partial_witness/partial_witness_actor_v2.rs | 5 +++-- chain/client/src/test_utils/setup.rs | 3 +++ integration-tests/src/test_loop/builder.rs | 3 +++ integration-tests/src/tests/network/runner.rs | 2 ++ nearcore/src/lib.rs | 6 ++++++ 6 files changed, 20 insertions(+), 2 deletions(-) diff --git a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs index 719fa2834a5..85df8abed9e 100644 --- a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs +++ b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs @@ -16,6 +16,7 @@ use near_primitives::stateless_validation::contract_distribution::ContractUpdate use near_primitives::stateless_validation::state_witness::ChunkStateWitness; use near_primitives::types::ShardId; use std::sync::Arc; +use tokio::runtime::Handle; use crate::client_actor::ClientSenderForPartialWitness; @@ -119,6 +120,7 @@ impl Handler for PartialWitnessActor { impl PartialWitnessActor { pub fn new( + rt: Handle, clock: Clock, network_adapter: PeerManagerAdapter, client_sender: ClientSenderForPartialWitness, @@ -129,6 +131,7 @@ impl PartialWitnessActor { partial_witness_spawner: Arc, ) -> Self { let tx = PartialWitnessService::new( + rt, clock, network_adapter, client_sender, diff --git a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs index ff3e326e505..8dd97eb1e2a 100644 --- a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs +++ b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs @@ -38,6 +38,7 @@ use near_store::adapter::trie_store::TrieStoreAdapter; use near_store::{DBCol, StorageError, TrieDBStorage, TrieStorage}; use near_vm_runner::{get_contract_cache_key, ContractCode, ContractRuntimeCache}; use rand::Rng; +use tokio::runtime::Handle; use crate::client_actor::ClientSenderForPartialWitness; use crate::stateless_validation::state_witness_tracker::ChunkStateWitnessTracker; @@ -110,6 +111,7 @@ pub struct PartialWitnessSenderForClient { impl PartialWitnessService { pub fn new( + rt: Handle, clock: Clock, network_adapter: PeerManagerAdapter, client_sender: ClientSenderForPartialWitness, @@ -144,7 +146,7 @@ impl PartialWitnessService { ), }; - tokio::spawn(async move { + rt.spawn(async move { actor.run().await.expect("Failed to run PartialWitnessActor"); }); @@ -155,7 +157,6 @@ impl PartialWitnessService { /// Main async loop processing all incoming PartialWitnessMsg. pub async fn run(mut self) -> Result<(), Error> { while let Some(msg) = self.rx.recv().await { - // Match on the enum variant and dispatch the appropriate handler: match msg { PartialWitnessMsg::DistributeStateWitnessRequest(req) => { if let Err(err) = self.handle_distribute_state_witness_request(req).await { diff --git a/chain/client/src/test_utils/setup.rs b/chain/client/src/test_utils/setup.rs index 37422fd7597..adea5091579 100644 --- a/chain/client/src/test_utils/setup.rs +++ b/chain/client/src/test_utils/setup.rs @@ -74,6 +74,7 @@ use std::cmp::max; use std::collections::{HashMap, HashSet}; use std::ops::DerefMut; use std::sync::{Arc, RwLock}; +use tokio::runtime::Runtime; pub const TEST_SEED: RngSeed = [3; 32]; @@ -156,7 +157,9 @@ pub fn setup( ); let client_adapter_for_partial_witness_actor = LateBoundSender::new(); + let networking_rt = Runtime::new().unwrap(); let (partial_witness_addr, _) = spawn_actix_actor(PartialWitnessActor::new( + networking_rt.handle().clone(), clock.clone(), network_adapter.clone(), client_adapter_for_partial_witness_actor.as_multi_sender(), diff --git a/integration-tests/src/test_loop/builder.rs b/integration-tests/src/test_loop/builder.rs index aabffeaff05..9a7d742777b 100644 --- a/integration-tests/src/test_loop/builder.rs +++ b/integration-tests/src/test_loop/builder.rs @@ -42,6 +42,7 @@ use near_store::{Store, StoreConfig, TrieConfig}; use near_vm_runner::logic::ProtocolVersion; use near_vm_runner::{ContractRuntimeCache, FilesystemContractRuntimeCache}; use nearcore::state_sync::StateSyncDumper; +use tokio::runtime::Runtime; use super::env::{ClientToShardsManagerSender, TestData, TestLoopChunksStorage, TestLoopEnv}; use super::utils::network::{chunk_endorsement_dropper, chunk_endorsement_dropper_by_hash}; @@ -719,7 +720,9 @@ impl TestLoopBuilder { ) .unwrap(); + let networking_rt = Runtime::new().unwrap(); let partial_witness_actor = PartialWitnessActor::new( + networking_rt.handle().clone(), self.test_loop.clock(), network_adapter.as_multi_sender(), client_adapter.as_multi_sender(), diff --git a/integration-tests/src/tests/network/runner.rs b/integration-tests/src/tests/network/runner.rs index a6fe267d014..530bfbead7e 100644 --- a/integration-tests/src/tests/network/runner.rs +++ b/integration-tests/src/tests/network/runner.rs @@ -140,7 +140,9 @@ fn setup_network_node( runtime.store().clone(), client_config.chunk_request_retry_period, ); + let networking_rt = tokio::runtime::Builder::new_current_thread().enable_all().build()?; let (partial_witness_actor, _) = spawn_actix_actor(PartialWitnessActor::new( + networking_rt.handle().clone(), Clock::real(), network_adapter.as_multi_sender(), client_actor.clone().with_auto_span_context().into_multi_sender(), diff --git a/nearcore/src/lib.rs b/nearcore/src/lib.rs index acd35c35cc1..4e30312753c 100644 --- a/nearcore/src/lib.rs +++ b/nearcore/src/lib.rs @@ -227,6 +227,8 @@ pub struct NearNode { pub state_sync_runtime: Arc, /// Shard tracker, allows querying of which shards are tracked by this node. pub shard_tracker: ShardTracker, + // The threads that the networking layer runs in. + _networking_rt: tokio::runtime::Runtime, } pub fn start_with_config(home_dir: &Path, config: NearConfig) -> anyhow::Result { @@ -363,8 +365,11 @@ pub fn start_with_config_and_synchronization( ); let snapshot_callbacks = SnapshotCallbacks { make_snapshot_callback, delete_snapshot_callback }; + let networking_rt = tokio::runtime::Builder::new_multi_thread().enable_all().build().unwrap(); + let (partial_witness_actor, partial_witness_arbiter) = spawn_actix_actor(PartialWitnessActor::new( + networking_rt.handle().clone(), Clock::real(), network_adapter.as_multi_sender(), client_adapter_for_partial_witness_actor.as_multi_sender(), @@ -517,5 +522,6 @@ pub fn start_with_config_and_synchronization( resharding_handle, state_sync_runtime, shard_tracker, + _networking_rt: networking_rt, }) } From 13f7858628136661470de536e0f8e14e3f5a6312 Mon Sep 17 00:00:00 2001 From: Stefan Neamtu Date: Tue, 7 Jan 2025 16:07:00 +0200 Subject: [PATCH 11/13] fix --- integration-tests/src/tests/network/runner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-tests/src/tests/network/runner.rs b/integration-tests/src/tests/network/runner.rs index 530bfbead7e..b7d7aa05b51 100644 --- a/integration-tests/src/tests/network/runner.rs +++ b/integration-tests/src/tests/network/runner.rs @@ -140,7 +140,7 @@ fn setup_network_node( runtime.store().clone(), client_config.chunk_request_retry_period, ); - let networking_rt = tokio::runtime::Builder::new_current_thread().enable_all().build()?; + let networking_rt = tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap(); let (partial_witness_actor, _) = spawn_actix_actor(PartialWitnessActor::new( networking_rt.handle().clone(), Clock::real(), From b66e10532ed406fdae33975a5f93e02814d85e30 Mon Sep 17 00:00:00 2001 From: Stefan Neamtu Date: Wed, 8 Jan 2025 11:20:09 +0200 Subject: [PATCH 12/13] use TokioRuntimeFutureSpawner and get rid of actix::spawn --- .../partial_witness/partial_witness_actor.rs | 47 ++++--------------- .../partial_witness_actor_v2.rs | 21 +++++---- chain/client/src/test_utils/setup.rs | 8 ++-- integration-tests/src/test_loop/builder.rs | 8 ++-- integration-tests/src/tests/network/runner.rs | 7 +-- nearcore/src/lib.rs | 10 ++-- 6 files changed, 41 insertions(+), 60 deletions(-) diff --git a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs index 85df8abed9e..5f649038ad8 100644 --- a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs +++ b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor.rs @@ -1,4 +1,4 @@ -use near_async::futures::AsyncComputationSpawner; +use near_async::futures::{AsyncComputationSpawner, TokioRuntimeFutureSpawner}; use near_async::messaging::{Actor, Handler, Sender}; use near_async::time::Clock; use near_async::{MultiSend, MultiSenderFrom}; @@ -16,7 +16,6 @@ use near_primitives::stateless_validation::contract_distribution::ContractUpdate use near_primitives::stateless_validation::state_witness::ChunkStateWitness; use near_primitives::types::ShardId; use std::sync::Arc; -use tokio::runtime::Handle; use crate::client_actor::ClientSenderForPartialWitness; @@ -46,81 +45,55 @@ pub struct PartialWitnessSenderForClient { impl Handler for PartialWitnessActor { #[perf] fn handle(&mut self, msg: DistributeStateWitnessRequest) { - let tx = self.tx.clone(); - actix::spawn(async move { - tx.send(PartialWitnessMsg::DistributeStateWitnessRequest(Box::new(msg))).await.unwrap(); - }); + self.tx.send(PartialWitnessMsg::DistributeStateWitnessRequest(Box::new(msg))).unwrap(); } } impl Handler for PartialWitnessActor { fn handle(&mut self, msg: ChunkStateWitnessAckMessage) { - let tx = self.tx.clone(); - actix::spawn(async move { - tx.send(PartialWitnessMsg::ChunkStateWitnessAckMessage(msg)).await.unwrap(); - }); + self.tx.send(PartialWitnessMsg::ChunkStateWitnessAckMessage(msg)).unwrap(); } } impl Handler for PartialWitnessActor { fn handle(&mut self, msg: PartialEncodedStateWitnessMessage) { - let tx = self.tx.clone(); - actix::spawn(async move { - tx.send(PartialWitnessMsg::PartialEncodedStateWitnessMessage(msg)).await.unwrap(); - }); + self.tx.send(PartialWitnessMsg::PartialEncodedStateWitnessMessage(msg)).unwrap(); } } impl Handler for PartialWitnessActor { fn handle(&mut self, msg: PartialEncodedStateWitnessForwardMessage) { - let tx = self.tx.clone(); - actix::spawn(async move { - tx.send(PartialWitnessMsg::PartialEncodedStateWitnessForwardMessage(msg)) - .await - .unwrap(); - }); + self.tx.send(PartialWitnessMsg::PartialEncodedStateWitnessForwardMessage(msg)).unwrap(); } } impl Handler for PartialWitnessActor { fn handle(&mut self, msg: ChunkContractAccessesMessage) { - let tx = self.tx.clone(); - actix::spawn(async move { - tx.send(PartialWitnessMsg::ChunkContractAccessesMessage(msg)).await.unwrap(); - }); + self.tx.send(PartialWitnessMsg::ChunkContractAccessesMessage(msg)).unwrap(); } } impl Handler for PartialWitnessActor { fn handle(&mut self, msg: PartialEncodedContractDeploysMessage) { - let tx = self.tx.clone(); - actix::spawn(async move { - tx.send(PartialWitnessMsg::PartialEncodedContractDeploysMessage(msg)).await.unwrap(); - }); + self.tx.send(PartialWitnessMsg::PartialEncodedContractDeploysMessage(msg)).unwrap(); } } impl Handler for PartialWitnessActor { fn handle(&mut self, msg: ContractCodeRequestMessage) { - let tx = self.tx.clone(); - actix::spawn(async move { - tx.send(PartialWitnessMsg::ContractCodeRequestMessage(msg)).await.unwrap(); - }); + self.tx.send(PartialWitnessMsg::ContractCodeRequestMessage(msg)).unwrap(); } } impl Handler for PartialWitnessActor { fn handle(&mut self, msg: ContractCodeResponseMessage) { - let tx = self.tx.clone(); - actix::spawn(async move { - tx.send(PartialWitnessMsg::ContractCodeResponseMessage(msg)).await.unwrap(); - }); + self.tx.send(PartialWitnessMsg::ContractCodeResponseMessage(msg)).unwrap(); } } impl PartialWitnessActor { pub fn new( - rt: Handle, + rt: Arc, clock: Clock, network_adapter: PeerManagerAdapter, client_sender: ClientSenderForPartialWitness, diff --git a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs index 8dd97eb1e2a..0e087e319e0 100644 --- a/chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs +++ b/chain/client/src/stateless_validation/partial_witness/partial_witness_actor_v2.rs @@ -4,7 +4,10 @@ use std::sync::Arc; use itertools::Itertools; use lru::LruCache; -use near_async::futures::{AsyncComputationSpawner, AsyncComputationSpawnerExt}; +use near_async::futures::{ + AsyncComputationSpawner, AsyncComputationSpawnerExt, FutureSpawnerExt, + TokioRuntimeFutureSpawner, +}; use near_async::messaging::{CanSend, Sender}; use near_async::time::Clock; use near_async::{MultiSend, MultiSenderFrom}; @@ -38,7 +41,6 @@ use near_store::adapter::trie_store::TrieStoreAdapter; use near_store::{DBCol, StorageError, TrieDBStorage, TrieStorage}; use near_vm_runner::{get_contract_cache_key, ContractCode, ContractRuntimeCache}; use rand::Rng; -use tokio::runtime::Handle; use crate::client_actor::ClientSenderForPartialWitness; use crate::stateless_validation::state_witness_tracker::ChunkStateWitnessTracker; @@ -52,7 +54,7 @@ use super::encoding::{CONTRACT_DEPLOYS_RATIO_DATA_PARTS, WITNESS_RATIO_DATA_PART use super::partial_deploys_tracker::PartialEncodedContractDeploysTracker; use super::partial_witness_tracker::PartialEncodedStateWitnessTracker; use near_primitives::utils::compression::CompressedData; -use tokio::sync::mpsc::{self, error::SendError, Receiver, Sender as MpscSender}; +use std::sync::mpsc::{self, Receiver, SendError, Sender as MpscSender}; const PROCESSED_CONTRACT_CODE_REQUESTS_CACHE_SIZE: usize = 30; @@ -73,8 +75,9 @@ pub struct PartialWitnessSender(MpscSender); impl PartialWitnessSender { /// Send a message to the Partial Witness Service (async). - pub async fn send(&self, msg: PartialWitnessMsg) -> Result<(), SendError> { - self.0.send(msg).await + #[allow(clippy::result_large_err)] + pub fn send(&self, msg: PartialWitnessMsg) -> Result<(), SendError> { + self.0.send(msg) } } @@ -111,7 +114,7 @@ pub struct PartialWitnessSenderForClient { impl PartialWitnessService { pub fn new( - rt: Handle, + rt: Arc, clock: Clock, network_adapter: PeerManagerAdapter, client_sender: ClientSenderForPartialWitness, @@ -121,7 +124,7 @@ impl PartialWitnessService { compile_contracts_spawner: Arc, partial_witness_spawner: Arc, ) -> PartialWitnessSender { - let (tx, rx) = mpsc::channel(1024); + let (tx, rx) = mpsc::channel(); let partial_witness_tracker = Arc::new(PartialEncodedStateWitnessTracker::new(client_sender, epoch_manager.clone())); @@ -146,7 +149,7 @@ impl PartialWitnessService { ), }; - rt.spawn(async move { + rt.spawn("PartialWitnessService", async move { actor.run().await.expect("Failed to run PartialWitnessActor"); }); @@ -156,7 +159,7 @@ impl PartialWitnessService { /// Main async loop processing all incoming PartialWitnessMsg. pub async fn run(mut self) -> Result<(), Error> { - while let Some(msg) = self.rx.recv().await { + while let Ok(msg) = self.rx.recv() { match msg { PartialWitnessMsg::DistributeStateWitnessRequest(req) => { if let Err(err) = self.handle_distribute_state_witness_request(req).await { diff --git a/chain/client/src/test_utils/setup.rs b/chain/client/src/test_utils/setup.rs index adea5091579..35c8db5f5de 100644 --- a/chain/client/src/test_utils/setup.rs +++ b/chain/client/src/test_utils/setup.rs @@ -16,7 +16,7 @@ use actix::{Actor, Addr, Context}; use futures::{future, FutureExt}; use near_async::actix::AddrWithAutoSpanContextExt; use near_async::actix_wrapper::{spawn_actix_actor, ActixWrapper}; -use near_async::futures::ActixFutureSpawner; +use near_async::futures::{ActixFutureSpawner, TokioRuntimeFutureSpawner}; use near_async::messaging::{ noop, CanSend, IntoMultiSender, IntoSender, LateBoundSender, SendAsync, Sender, }; @@ -74,7 +74,6 @@ use std::cmp::max; use std::collections::{HashMap, HashSet}; use std::ops::DerefMut; use std::sync::{Arc, RwLock}; -use tokio::runtime::Runtime; pub const TEST_SEED: RngSeed = [3; 32]; @@ -157,9 +156,10 @@ pub fn setup( ); let client_adapter_for_partial_witness_actor = LateBoundSender::new(); - let networking_rt = Runtime::new().unwrap(); + let networking_rt = Arc::new(tokio::runtime::Builder::new_current_thread().build().unwrap()); + let networking_spawner = Arc::new(TokioRuntimeFutureSpawner(networking_rt)); let (partial_witness_addr, _) = spawn_actix_actor(PartialWitnessActor::new( - networking_rt.handle().clone(), + networking_spawner, clock.clone(), network_adapter.clone(), client_adapter_for_partial_witness_actor.as_multi_sender(), diff --git a/integration-tests/src/test_loop/builder.rs b/integration-tests/src/test_loop/builder.rs index 9a7d742777b..4f04b24994b 100644 --- a/integration-tests/src/test_loop/builder.rs +++ b/integration-tests/src/test_loop/builder.rs @@ -2,7 +2,7 @@ use std::collections::{HashMap, HashSet}; use std::sync::{Arc, Mutex}; use tempfile::TempDir; -use near_async::futures::FutureSpawner; +use near_async::futures::{FutureSpawner, TokioRuntimeFutureSpawner}; use near_async::messaging::{noop, IntoMultiSender, IntoSender, LateBoundSender}; use near_async::test_loop::sender::TestLoopSender; use near_async::test_loop::TestLoopV2; @@ -720,9 +720,11 @@ impl TestLoopBuilder { ) .unwrap(); - let networking_rt = Runtime::new().unwrap(); + let networking_rt = + Arc::new(tokio::runtime::Builder::new_current_thread().build().unwrap()); + let networking_spawner = Arc::new(TokioRuntimeFutureSpawner(networking_rt.clone())); let partial_witness_actor = PartialWitnessActor::new( - networking_rt.handle().clone(), + networking_spawner, self.test_loop.clock(), network_adapter.as_multi_sender(), client_adapter.as_multi_sender(), diff --git a/integration-tests/src/tests/network/runner.rs b/integration-tests/src/tests/network/runner.rs index b7d7aa05b51..654e7832eb5 100644 --- a/integration-tests/src/tests/network/runner.rs +++ b/integration-tests/src/tests/network/runner.rs @@ -2,7 +2,7 @@ use actix::{Actor, Addr}; use anyhow::{anyhow, bail, Context}; use near_async::actix::AddrWithAutoSpanContextExt; use near_async::actix_wrapper::{spawn_actix_actor, ActixWrapper}; -use near_async::futures::ActixFutureSpawner; +use near_async::futures::{ActixFutureSpawner, TokioRuntimeFutureSpawner}; use near_async::messaging::{noop, IntoMultiSender, IntoSender, LateBoundSender}; use near_async::time::{self, Clock}; use near_chain::rayon_spawner::RayonAsyncComputationSpawner; @@ -140,9 +140,10 @@ fn setup_network_node( runtime.store().clone(), client_config.chunk_request_retry_period, ); - let networking_rt = tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap(); + let networking_rt = Arc::new(tokio::runtime::Builder::new_current_thread().build().unwrap()); + let networking_spawner = Arc::new(TokioRuntimeFutureSpawner(networking_rt.clone())); let (partial_witness_actor, _) = spawn_actix_actor(PartialWitnessActor::new( - networking_rt.handle().clone(), + networking_spawner, Clock::real(), network_adapter.as_multi_sender(), client_actor.clone().with_auto_span_context().into_multi_sender(), diff --git a/nearcore/src/lib.rs b/nearcore/src/lib.rs index 4e30312753c..9e28d301cdc 100644 --- a/nearcore/src/lib.rs +++ b/nearcore/src/lib.rs @@ -228,7 +228,7 @@ pub struct NearNode { /// Shard tracker, allows querying of which shards are tracked by this node. pub shard_tracker: ShardTracker, // The threads that the networking layer runs in. - _networking_rt: tokio::runtime::Runtime, + pub networking_rt: Arc, } pub fn start_with_config(home_dir: &Path, config: NearConfig) -> anyhow::Result { @@ -365,11 +365,13 @@ pub fn start_with_config_and_synchronization( ); let snapshot_callbacks = SnapshotCallbacks { make_snapshot_callback, delete_snapshot_callback }; - let networking_rt = tokio::runtime::Builder::new_multi_thread().enable_all().build().unwrap(); + let networking_rt = + Arc::new(tokio::runtime::Builder::new_multi_thread().enable_all().build().unwrap()); + let networking_spawner = Arc::new(TokioRuntimeFutureSpawner(networking_rt.clone())); let (partial_witness_actor, partial_witness_arbiter) = spawn_actix_actor(PartialWitnessActor::new( - networking_rt.handle().clone(), + networking_spawner, Clock::real(), network_adapter.as_multi_sender(), client_adapter_for_partial_witness_actor.as_multi_sender(), @@ -522,6 +524,6 @@ pub fn start_with_config_and_synchronization( resharding_handle, state_sync_runtime, shard_tracker, - _networking_rt: networking_rt, + networking_rt, }) } From 560efdcd71b7070b5b48c19c9991a84da2b998a2 Mon Sep 17 00:00:00 2001 From: Stefan Neamtu Date: Wed, 8 Jan 2025 11:37:05 +0200 Subject: [PATCH 13/13] clippy --- integration-tests/src/test_loop/builder.rs | 3 +-- integration-tests/src/tests/network/runner.rs | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/integration-tests/src/test_loop/builder.rs b/integration-tests/src/test_loop/builder.rs index 4f04b24994b..fb6136de4b8 100644 --- a/integration-tests/src/test_loop/builder.rs +++ b/integration-tests/src/test_loop/builder.rs @@ -42,7 +42,6 @@ use near_store::{Store, StoreConfig, TrieConfig}; use near_vm_runner::logic::ProtocolVersion; use near_vm_runner::{ContractRuntimeCache, FilesystemContractRuntimeCache}; use nearcore::state_sync::StateSyncDumper; -use tokio::runtime::Runtime; use super::env::{ClientToShardsManagerSender, TestData, TestLoopChunksStorage, TestLoopEnv}; use super::utils::network::{chunk_endorsement_dropper, chunk_endorsement_dropper_by_hash}; @@ -722,7 +721,7 @@ impl TestLoopBuilder { let networking_rt = Arc::new(tokio::runtime::Builder::new_current_thread().build().unwrap()); - let networking_spawner = Arc::new(TokioRuntimeFutureSpawner(networking_rt.clone())); + let networking_spawner = Arc::new(TokioRuntimeFutureSpawner(networking_rt)); let partial_witness_actor = PartialWitnessActor::new( networking_spawner, self.test_loop.clock(), diff --git a/integration-tests/src/tests/network/runner.rs b/integration-tests/src/tests/network/runner.rs index 654e7832eb5..3138a5dc90d 100644 --- a/integration-tests/src/tests/network/runner.rs +++ b/integration-tests/src/tests/network/runner.rs @@ -141,7 +141,7 @@ fn setup_network_node( client_config.chunk_request_retry_period, ); let networking_rt = Arc::new(tokio::runtime::Builder::new_current_thread().build().unwrap()); - let networking_spawner = Arc::new(TokioRuntimeFutureSpawner(networking_rt.clone())); + let networking_spawner = Arc::new(TokioRuntimeFutureSpawner(networking_rt)); let (partial_witness_actor, _) = spawn_actix_actor(PartialWitnessActor::new( networking_spawner, Clock::real(),