Skip to content

Commit

Permalink
Crash recovery (#938)
Browse files Browse the repository at this point in the history
* WIP crash recovery

* Implement bonsai host crash recovery

* Store session ids by l1 height & get rid of latest proof l1 hash

* Lint

* Nits

* Remove no-std breaking function from zkvmhost trait

* Fmt

* Update session storage logic

* Lint

* Clear pending sessions in prover service

* Use l1 height data from proof data for recovered proofs

* Naming

* Lint

* Fix merge bug
  • Loading branch information
ercecan authored Aug 14, 2024
1 parent 66a1a19 commit a680783
Show file tree
Hide file tree
Showing 17 changed files with 502 additions and 202 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions bin/citrea/src/rollup/bitcoin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,13 @@ impl RollupBlueprint for BitcoinRollup {
prover_config: ProverConfig,
_rollup_config: &FullNodeConfig<Self::DaConfig>,
_da_service: &Arc<Self::DaService>,
ledger_db: LedgerDB,
) -> Self::ProverService {
let vm = Risc0BonsaiHost::new(
citrea_risc0::BITCOIN_DA_ELF,
std::env::var("BONSAI_API_URL").unwrap_or("".to_string()),
std::env::var("BONSAI_API_KEY").unwrap_or("".to_string()),
ledger_db.clone(),
);
let zk_stf = StfBlueprint::new();
let zk_storage = ZkStorage::new();
Expand All @@ -155,6 +157,7 @@ impl RollupBlueprint for BitcoinRollup {
da_verifier,
prover_config,
zk_storage,
ledger_db,
)
.expect("Should be able to instantiate prover service")
}
Expand Down
3 changes: 3 additions & 0 deletions bin/citrea/src/rollup/mock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,13 @@ impl RollupBlueprint for MockDemoRollup {
prover_config: ProverConfig,
_rollup_config: &FullNodeConfig<Self::DaConfig>,
_da_service: &Arc<Self::DaService>,
ledger_db: LedgerDB,
) -> Self::ProverService {
let vm = Risc0BonsaiHost::new(
citrea_risc0::MOCK_DA_ELF,
std::env::var("BONSAI_API_URL").unwrap_or("".to_string()),
std::env::var("BONSAI_API_KEY").unwrap_or("".to_string()),
ledger_db.clone(),
);
let zk_stf = StfBlueprint::new();
let zk_storage = ZkStorage::new();
Expand All @@ -116,6 +118,7 @@ impl RollupBlueprint for MockDemoRollup {
da_verifier,
prover_config,
zk_storage,
ledger_db,
)
.expect("Should be able to instantiate prover service")
}
Expand Down
10 changes: 8 additions & 2 deletions bin/citrea/src/rollup/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,15 +230,21 @@ pub trait CitreaRollupBlueprint: RollupBlueprint {
{
let da_service = self.create_da_service(&rollup_config).await?;

let ledger_db = self.create_ledger_db(&rollup_config);

let prover_service = self
.create_prover_service(prover_config.clone(), &rollup_config, &da_service)
.create_prover_service(
prover_config.clone(),
&rollup_config,
&da_service,
ledger_db.clone(),
)
.await;

// TODO: Double check what kind of storage needed here.
// Maybe whole "prev_root" can be initialized inside runner
// Getting block here, so prover_service doesn't have to be `Send`

let ledger_db = self.create_ledger_db(&rollup_config);
let genesis_config = self.create_genesis_config(runtime_genesis_paths, &rollup_config)?;

let mut storage_manager = self.create_storage_manager(&rollup_config)?;
Expand Down
30 changes: 30 additions & 0 deletions crates/prover/src/prover_service/parallel/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use citrea_stf::verifier::StateTransitionVerifier;
use prover::Prover;
use serde::de::DeserializeOwned;
use serde::Serialize;
use sov_db::ledger_db::{LedgerDB, ProvingServiceLedgerOps};
use sov_rollup_interface::da::{DaData, DaSpec};
use sov_rollup_interface::services::da::DaService;
use sov_rollup_interface::stf::StateTransitionFunction;
Expand Down Expand Up @@ -34,6 +35,7 @@ where

zk_storage: V::PreState,
prover_state: Prover<StateRoot, Witness, Da>,
ledger_db: LedgerDB,
}

impl<StateRoot, Witness, Da, Vm, V> ParallelProverService<StateRoot, Witness, Da, Vm, V>
Expand Down Expand Up @@ -62,6 +64,7 @@ where
config: ProverGuestRunConfig,
zk_storage: V::PreState,
num_threads: usize,
ledger_db: LedgerDB,
) -> anyhow::Result<Self> {
let stf_verifier =
StateTransitionVerifier::<V, Da::Verifier, Vm::Guest>::new(zk_stf, da_verifier);
Expand Down Expand Up @@ -96,6 +99,7 @@ where
prover_config,
prover_state: Prover::new(num_threads)?,
zk_storage,
ledger_db,
})
}

Expand All @@ -106,6 +110,7 @@ where
da_verifier: Da::Verifier,
prover_config: ProverConfig,
zk_storage: V::PreState,
ledger_db: LedgerDB,
) -> anyhow::Result<Self> {
let num_cpus = num_cpus::get();
assert!(num_cpus > 1, "Unable to create parallel prover service");
Expand All @@ -117,6 +122,7 @@ where
prover_config.proving_mode,
zk_storage,
num_cpus - 1,
ledger_db,
)
}
}
Expand Down Expand Up @@ -192,6 +198,7 @@ where
.send_transaction(da_data)
.await
.map_err(|e| anyhow::anyhow!(e))?;
self.ledger_db.clear_pending_proving_sessions()?;
break Ok((tx_id, proof));
}
ProverStatus::ProvingInProgress => {
Expand All @@ -203,4 +210,27 @@ where
}
}
}

async fn recover_proving_sessions_and_send_to_da(
&self,
da_service: &Arc<Self::DaService>,
) -> Result<Vec<(<Da as DaService>::TransactionId, Proof)>, anyhow::Error> {
tracing::info!("Checking if ongoing bonsai session exists");

let vm = self.vm.clone();
let proofs = vm.recover_proving_sessions()?;

let mut results = Vec::new();

for proof in proofs.into_iter() {
let da_data = DaData::ZKProof(proof.clone());
let tx_id = da_service
.send_transaction(da_data)
.await
.map_err(|e| anyhow::anyhow!(e))?;
results.push((tx_id, proof));
}
self.ledger_db.clear_pending_proving_sessions()?;
Ok(results)
}
}
Loading

0 comments on commit a680783

Please sign in to comment.