diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 4a67e5771e..5b81850b5b 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -2882,7 +2882,8 @@ async fn cmd_db_instance_info( vmm::dsl as vmm_dsl, }; use nexus_db_model::{ - Instance, InstanceKarmicStatus, InstanceRuntimeState, Migration, Vmm, + Instance, InstanceKarmicStatus, InstanceRuntimeState, Migration, + Reincarnatability, Vmm, }; let InstanceInfoArgs { id } = args; @@ -2943,8 +2944,9 @@ async fn cmd_db_instance_info( const STATE: &'static str = "nexus state"; const LAST_MODIFIED: &'static str = "last modified at"; const LAST_UPDATED: &'static str = "last updated at"; - const LAST_AUTO_RESTART: &'static str = "last auto-restarted at"; - const KARMIC_STATUS: &'static str = "karmic status"; + const LAST_AUTO_RESTART: &'static str = " last reincarnated at"; + const KARMIC_STATUS: &'static str = " karmic status"; + const NEEDS_REINCARNATION: &'static str = "needs reincarnation"; const ACTIVE_VMM: &'static str = "active VMM ID"; const TARGET_VMM: &'static str = "target VMM ID"; const MIGRATION_ID: &'static str = "migration ID"; @@ -2968,6 +2970,7 @@ async fn cmd_db_instance_info( LAST_MODIFIED, LAST_AUTO_RESTART, KARMIC_STATUS, + NEEDS_REINCARNATION, ACTIVE_VMM, TARGET_VMM, MIGRATION_ID, @@ -3026,25 +3029,32 @@ async fn cmd_db_instance_info( " {LAST_UPDATED:>WIDTH$}: {time_updated:?} (generation {})", r#gen.0 ); - println!(" {LAST_AUTO_RESTART:>WIDTH$}: {time_last_auto_restarted:?}"); - match instance - .auto_restart - .status(&instance.runtime_state, active_vmm.as_ref()) - { - InstanceKarmicStatus::NotFailed => {} - InstanceKarmicStatus::Ready => { - println!("(i) {KARMIC_STATUS:>WIDTH$}: ready to reincarnate!"); + + // Reincarnation status + let InstanceKarmicStatus { needs_reincarnation, can_reincarnate } = + instance + .auto_restart + .status(&instance.runtime_state, active_vmm.as_ref()); + println!( + "{} {NEEDS_REINCARNATION:>WIDTH$}: {needs_reincarnation}", + if needs_reincarnation { "(i)" } else { " " } + ); + match can_reincarnate { + Reincarnatability::WillReincarnate => { + println!(" {KARMIC_STATUS:>WIDTH$}: bound to saṃsāra"); } - InstanceKarmicStatus::Forbidden => { - println!("(i) {KARMIC_STATUS:>WIDTH$}: reincarnation forbidden"); + Reincarnatability::Nirvana => { + println!(" {KARMIC_STATUS:>WIDTH$}: attained nirvāṇa"); } - InstanceKarmicStatus::CoolingDown(remaining) => { + Reincarnatability::CoolingDown(remaining) => { println!( "/!\\ {KARMIC_STATUS:>WIDTH$}: cooling down \ ({remaining:?} remaining)" ); } } + println!(" {LAST_AUTO_RESTART:>WIDTH$}: {time_last_auto_restarted:?}"); + println!(" {ACTIVE_VMM:>WIDTH$}: {propolis_id:?}"); println!(" {TARGET_VMM:>WIDTH$}: {dst_propolis_id:?}"); println!( diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs index a753e81ae8..f57a70aa80 100644 --- a/nexus/db-model/src/instance.rs +++ b/nexus/db-model/src/instance.rs @@ -4,7 +4,7 @@ use super::{ ByteCount, Disk, ExternalIp, Generation, InstanceAutoRestartPolicy, - InstanceCpuCount, InstanceState, Vmm, + InstanceCpuCount, InstanceState, Vmm, VmmState, }; use crate::collection::DatastoreAttachTargetConfig; use crate::schema::{disk, external_ip, instance}; @@ -266,18 +266,36 @@ pub struct InstanceAutoRestart { pub cooldown: Option, } +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct InstanceKarmicStatus { + /// Whether the instance is permitted to reincarnate if + /// `needs_reincarnation` is `true`. + pub can_reincarnate: Reincarnatability, + /// `true` if the instance is in a state in which it could reincarnate if + /// `can_reincarnate` would permit it to do so. + pub needs_reincarnation: bool, +} + +impl InstanceKarmicStatus { + /// Returns `true` if this instance is in a state that requires + /// reincarnation, and is permitted to reincarnate immediately. + pub fn should_reincarnate(&self) -> bool { + self.needs_reincarnation + && self.can_reincarnate == Reincarnatability::WillReincarnate + } +} + /// Describes whether or not an instance can reincarnate. #[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum InstanceKarmicStatus { - /// The instance is ready to reincarnate. - Ready, - /// The instance does not need reincarnation, as it is not currently in the - /// `Failed` state. - NotFailed, +pub enum Reincarnatability { + /// The instance remains bound to the cycle of saṃsāra and can return in the + /// next life. + WillReincarnate, /// The instance cannot reincarnate again until the specified time. CoolingDown(TimeDelta), - /// The instance's auto-restart policy forbids it from reincarnating. - Forbidden, + /// The instance's auto-restart policy indicates that it has attained + /// nirvāṇa and will not reincarnate. + Nirvana, } impl InstanceAutoRestart { @@ -292,8 +310,7 @@ impl InstanceAutoRestart { pub const DEFAULT_POLICY: InstanceAutoRestartPolicy = InstanceAutoRestartPolicy::BestEffort; - /// Returns `true` if `self` permits an instance to reincarnate given the - /// provided `state`. + /// Returns an instance's karmic status. pub fn status( &self, state: &InstanceRuntimeState, @@ -301,35 +318,50 @@ impl InstanceAutoRestart { ) -> InstanceKarmicStatus { // Instances only need to be automatically restarted if they are in the // `Failed` state, or if their active VMM is in the `SagaUnwound` state. - match (state.nexus_state, active_vmm) { + let needs_reincarnation = match (state.nexus_state, active_vmm) { (InstanceState::Failed, _vmm) => { debug_assert!( _vmm.is_none(), "a Failed instance will never have an active VMM!" ); + true } (InstanceState::Vmm, Some(ref vmm)) => { debug_assert_eq!( state.propolis_id, - vmm.id(), + Some(vmm.id), "don't call `InstanceAutoRestart::status with a VMM \ that isn't this instance's active VMM!?!?" ); - // Note that we *don't* reincarnate instances with `Failed`` active + // Note that we *don't* reincarnate instances with `Failed` active // VMMs; in that case, an instance-update saga must first run to // move the *instance* record to the `Failed` state. - if vmm.runtime.state != VmmState::SagaUnwound { - return InstanceKarmicStatus::NotFailed; - } + vmm.runtime.state == VmmState::SagaUnwound } - _ => return InstanceKarmicStatus::NotFailed, + _ => false, }; + InstanceKarmicStatus { + needs_reincarnation, + can_reincarnate: self.can_reincarnate(&state), + } + } + + /// Returns whether or not this auto-restart configuration will permit an + /// instance with the provided `InstanceRuntimeState` to reincarnate. + /// + /// This does *not* indicate that the instance currently needs + /// reincarnation, but instead, whether the instance will be permitted to + /// reincarnate should it be in such a state. + pub fn can_reincarnate( + &self, + state: &InstanceRuntimeState, + ) -> Reincarnatability { // Check if the instance's configured auto-restart policy permits the // control plane to automatically restart it. let policy = self.policy.unwrap_or(Self::DEFAULT_POLICY); if policy == InstanceAutoRestartPolicy::Never { - return InstanceKarmicStatus::Forbidden; + return Reincarnatability::Nirvana; } // If the instance is permitted to reincarnate, ensure that its last @@ -341,15 +373,15 @@ impl InstanceAutoRestart { let cooldown = self.cooldown.unwrap_or(Self::DEFAULT_COOLDOWN); let time_since_last = Utc::now().signed_duration_since(last); if time_since_last >= cooldown { - return InstanceKarmicStatus::Ready; + return Reincarnatability::WillReincarnate; } else { - return InstanceKarmicStatus::CoolingDown( + return Reincarnatability::CoolingDown( cooldown - time_since_last, ); } } - InstanceKarmicStatus::Ready + Reincarnatability::WillReincarnate } /// Filters a database query to include only instances whose auto-restart diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index ac3bce46c5..b92533cd68 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -233,15 +233,47 @@ async fn sis_destroy_vmm_record( ¶ms.serialized_authn, ); + let db_instance = params.db_instance; let propolis_id = sagactx.lookup::("propolis_id")?; info!( osagactx.log(), "destroying vmm record for start saga unwind"; + "instance_id" => %db_instance.id(), "propolis_id" => %propolis_id, "start_reason" => ?params.reason, ); osagactx.datastore().vmm_mark_saga_unwound(&opctx, &propolis_id).await?; + + // Now that the VMM record has been marked as `SagaUnwound`, the instance + // may be permitted to reincarnate. If it is, activate the instance + // reincarnation background task to help it along. + let karmic_status = + db_instance.auto_restart.can_reincarnate(db_instance.runtime()); + if karmic_status == db::model::Reincarnatability::WillReincarnate { + info!( + osagactx.log(), + "start saga unwound; instance may reincarnate"; + "instance_id" => %db_instance.id(), + "auto_restart_config" => ?db_instance.auto_restart, + "start_reason" => ?params.reason, + ); + osagactx + .nexus() + .background_tasks + .task_instance_reincarnation + .activate(); + } else { + debug!( + osagactx.log(), + "start saga unwound; but instance will not reincarnate"; + "instance_id" => %db_instance.id(), + "auto_restart_config" => ?db_instance.auto_restart, + "start_reason" => ?params.reason, + "karmic_status" => ?karmic_status, + ); + } + Ok(()) } diff --git a/nexus/src/app/sagas/instance_update/mod.rs b/nexus/src/app/sagas/instance_update/mod.rs index e55bf33113..f0610c04c8 100644 --- a/nexus/src/app/sagas/instance_update/mod.rs +++ b/nexus/src/app/sagas/instance_update/mod.rs @@ -350,7 +350,6 @@ use crate::app::db::datastore::VmmStateUpdateResult; use crate::app::db::lookup::LookupPath; use crate::app::db::model::ByteCount; use crate::app::db::model::Generation; -use crate::app::db::model::InstanceKarmicStatus; use crate::app::db::model::InstanceRuntimeState; use crate::app::db::model::InstanceState; use crate::app::db::model::MigrationState; @@ -1288,40 +1287,30 @@ async fn siu_chain_successor_saga( // auto-restart policy allows it to be automatically restarted. If // it does, activate the instance-reincarnation background task to // automatically restart it. - let auto_restart = new_state.instance.auto_restart; - match auto_restart.status( + let karmic_state = new_state.instance.auto_restart.status( &new_state.instance.runtime_state, new_state.active_vmm.as_ref(), - ) { - InstanceKarmicStatus::Ready => { - info!( - log, - "instance update: instance transitioned to Failed, \ - but can be automatically restarted; activating \ - reincarnation."; - "instance_id" => %instance_id, - "auto_restart" => ?auto_restart, - "runtime_state" => ?new_state.instance.runtime_state, - ); - nexus - .background_tasks - .task_instance_reincarnation - .activate(); - } - InstanceKarmicStatus::CoolingDown(remaining) => { - info!( - log, - "instance update: instance transitioned to Failed, \ - but is still in cooldown from a previous \ - reincarnation"; - "instance_id" => %instance_id, - "auto_restart" => ?auto_restart, - "cooldown_remaining" => ?remaining, - "runtime_state" => ?new_state.instance.runtime_state, - ); - } - InstanceKarmicStatus::Forbidden - | InstanceKarmicStatus::NotFailed => {} + ); + if karmic_state.should_reincarnate() { + info!( + log, + "instance update: instance transitioned to Failed, \ + but can be automatically restarted; activating \ + reincarnation."; + "instance_id" => %instance_id, + "auto_restart_config" => ?new_state.instance.auto_restart, + "runtime_state" => ?new_state.instance.runtime_state, + ); + nexus.background_tasks.task_instance_reincarnation.activate(); + } else { + debug!( + log, + "instance update: instance will not reincarnate"; + "instance_id" => %instance_id, + "auto_restart_config" => ?new_state.instance.auto_restart, + "needs_reincarnation" => karmic_state.needs_reincarnation, + "karmic_state" => ?karmic_state.can_reincarnate, + ) } }