From 27564fc055080ac8783ae67b924c62c8888c5b6a Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Fri, 27 Sep 2024 12:18:46 -0700 Subject: [PATCH] include active VMM in karmic status check --- dev-tools/omdb/src/bin/omdb/db.rs | 5 ++- nexus/db-model/src/instance.rs | 36 ++++++++++++++++++---- nexus/src/app/sagas/instance_update/mod.rs | 5 ++- 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 12eec3f783..4a67e5771e 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -3027,7 +3027,10 @@ async fn cmd_db_instance_info( r#gen.0 ); println!(" {LAST_AUTO_RESTART:>WIDTH$}: {time_last_auto_restarted:?}"); - match instance.auto_restart.status(&instance.runtime_state) { + match instance + .auto_restart + .status(&instance.runtime_state, active_vmm.as_ref()) + { InstanceKarmicStatus::NotFailed => {} InstanceKarmicStatus::Ready => { println!("(i) {KARMIC_STATUS:>WIDTH$}: ready to reincarnate!"); diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs index 4dbf80fea1..a753e81ae8 100644 --- a/nexus/db-model/src/instance.rs +++ b/nexus/db-model/src/instance.rs @@ -4,7 +4,7 @@ use super::{ ByteCount, Disk, ExternalIp, Generation, InstanceAutoRestartPolicy, - InstanceCpuCount, InstanceState, + InstanceCpuCount, InstanceState, Vmm, }; use crate::collection::DatastoreAttachTargetConfig; use crate::schema::{disk, external_ip, instance}; @@ -294,12 +294,36 @@ impl InstanceAutoRestart { /// Returns `true` if `self` permits an instance to reincarnate given the /// provided `state`. - pub fn status(&self, state: &InstanceRuntimeState) -> InstanceKarmicStatus { + pub fn status( + &self, + state: &InstanceRuntimeState, + active_vmm: Option<&Vmm>, + ) -> InstanceKarmicStatus { // Instances only need to be automatically restarted if they are in the - // `Failed` state. - if state.nexus_state != InstanceState::Failed { - return InstanceKarmicStatus::NotFailed; - } + // `Failed` state, or if their active VMM is in the `SagaUnwound` state. + match (state.nexus_state, active_vmm) { + (InstanceState::Failed, _vmm) => { + debug_assert!( + _vmm.is_none(), + "a Failed instance will never have an active VMM!" + ); + } + (InstanceState::Vmm, Some(ref vmm)) => { + debug_assert_eq!( + state.propolis_id, + vmm.id(), + "don't call `InstanceAutoRestart::status with a VMM \ + that isn't this instance's active VMM!?!?" + ); + // Note that we *don't* reincarnate instances with `Failed`` active + // VMMs; in that case, an instance-update saga must first run to + // move the *instance* record to the `Failed` state. + if vmm.runtime.state != VmmState::SagaUnwound { + return InstanceKarmicStatus::NotFailed; + } + } + _ => return InstanceKarmicStatus::NotFailed, + }; // Check if the instance's configured auto-restart policy permits the // control plane to automatically restart it. diff --git a/nexus/src/app/sagas/instance_update/mod.rs b/nexus/src/app/sagas/instance_update/mod.rs index 9d2fc58512..e55bf33113 100644 --- a/nexus/src/app/sagas/instance_update/mod.rs +++ b/nexus/src/app/sagas/instance_update/mod.rs @@ -1289,7 +1289,10 @@ async fn siu_chain_successor_saga( // it does, activate the instance-reincarnation background task to // automatically restart it. let auto_restart = new_state.instance.auto_restart; - match auto_restart.status(&new_state.instance.runtime_state) { + match auto_restart.status( + &new_state.instance.runtime_state, + new_state.active_vmm.as_ref(), + ) { InstanceKarmicStatus::Ready => { info!( log,