Skip to content

Commit

Permalink
activate reincarnation when unwinding start sagas
Browse files Browse the repository at this point in the history
  • Loading branch information
hawkw committed Sep 27, 2024
1 parent 27564fc commit 19f9f16
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 69 deletions.
38 changes: 24 additions & 14 deletions dev-tools/omdb/src/bin/omdb/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2882,7 +2882,8 @@ async fn cmd_db_instance_info(
vmm::dsl as vmm_dsl,
};
use nexus_db_model::{
Instance, InstanceKarmicStatus, InstanceRuntimeState, Migration, Vmm,
Instance, InstanceKarmicStatus, InstanceRuntimeState, Migration,
Reincarnatability, Vmm,
};
let InstanceInfoArgs { id } = args;

Expand Down Expand Up @@ -2943,8 +2944,9 @@ async fn cmd_db_instance_info(
const STATE: &'static str = "nexus state";
const LAST_MODIFIED: &'static str = "last modified at";
const LAST_UPDATED: &'static str = "last updated at";
const LAST_AUTO_RESTART: &'static str = "last auto-restarted at";
const KARMIC_STATUS: &'static str = "karmic status";
const LAST_AUTO_RESTART: &'static str = " last reincarnated at";
const KARMIC_STATUS: &'static str = " karmic status";
const NEEDS_REINCARNATION: &'static str = "needs reincarnation";
const ACTIVE_VMM: &'static str = "active VMM ID";
const TARGET_VMM: &'static str = "target VMM ID";
const MIGRATION_ID: &'static str = "migration ID";
Expand All @@ -2968,6 +2970,7 @@ async fn cmd_db_instance_info(
LAST_MODIFIED,
LAST_AUTO_RESTART,
KARMIC_STATUS,
NEEDS_REINCARNATION,
ACTIVE_VMM,
TARGET_VMM,
MIGRATION_ID,
Expand Down Expand Up @@ -3026,25 +3029,32 @@ async fn cmd_db_instance_info(
" {LAST_UPDATED:>WIDTH$}: {time_updated:?} (generation {})",
r#gen.0
);
println!(" {LAST_AUTO_RESTART:>WIDTH$}: {time_last_auto_restarted:?}");
match instance
.auto_restart
.status(&instance.runtime_state, active_vmm.as_ref())
{
InstanceKarmicStatus::NotFailed => {}
InstanceKarmicStatus::Ready => {
println!("(i) {KARMIC_STATUS:>WIDTH$}: ready to reincarnate!");

// Reincarnation status
let InstanceKarmicStatus { needs_reincarnation, can_reincarnate } =
instance
.auto_restart
.status(&instance.runtime_state, active_vmm.as_ref());
println!(
"{} {NEEDS_REINCARNATION:>WIDTH$}: {needs_reincarnation}",
if needs_reincarnation { "(i)" } else { " " }
);
match can_reincarnate {
Reincarnatability::WillReincarnate => {
println!(" {KARMIC_STATUS:>WIDTH$}: bound to saṃsāra");
}
InstanceKarmicStatus::Forbidden => {
println!("(i) {KARMIC_STATUS:>WIDTH$}: reincarnation forbidden");
Reincarnatability::Nirvana => {
println!(" {KARMIC_STATUS:>WIDTH$}: attained nirvāṇa");
}
InstanceKarmicStatus::CoolingDown(remaining) => {
Reincarnatability::CoolingDown(remaining) => {
println!(
"/!\\ {KARMIC_STATUS:>WIDTH$}: cooling down \
({remaining:?} remaining)"
);
}
}
println!(" {LAST_AUTO_RESTART:>WIDTH$}: {time_last_auto_restarted:?}");

println!(" {ACTIVE_VMM:>WIDTH$}: {propolis_id:?}");
println!(" {TARGET_VMM:>WIDTH$}: {dst_propolis_id:?}");
println!(
Expand Down
76 changes: 54 additions & 22 deletions nexus/db-model/src/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use super::{
ByteCount, Disk, ExternalIp, Generation, InstanceAutoRestartPolicy,
InstanceCpuCount, InstanceState, Vmm,
InstanceCpuCount, InstanceState, Vmm, VmmState,
};
use crate::collection::DatastoreAttachTargetConfig;
use crate::schema::{disk, external_ip, instance};
Expand Down Expand Up @@ -266,18 +266,36 @@ pub struct InstanceAutoRestart {
pub cooldown: Option<TimeDelta>,
}

#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct InstanceKarmicStatus {
/// Whether the instance is permitted to reincarnate if
/// `needs_reincarnation` is `true`.
pub can_reincarnate: Reincarnatability,
/// `true` if the instance is in a state in which it could reincarnate if
/// `can_reincarnate` would permit it to do so.
pub needs_reincarnation: bool,
}

impl InstanceKarmicStatus {
/// Returns `true` if this instance is in a state that requires
/// reincarnation, and is permitted to reincarnate immediately.
pub fn should_reincarnate(&self) -> bool {
self.needs_reincarnation
&& self.can_reincarnate == Reincarnatability::WillReincarnate
}
}

/// Describes whether or not an instance can reincarnate.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum InstanceKarmicStatus {
/// The instance is ready to reincarnate.
Ready,
/// The instance does not need reincarnation, as it is not currently in the
/// `Failed` state.
NotFailed,
pub enum Reincarnatability {
/// The instance remains bound to the cycle of saṃsāra and can return in the
/// next life.
WillReincarnate,
/// The instance cannot reincarnate again until the specified time.
CoolingDown(TimeDelta),
/// The instance's auto-restart policy forbids it from reincarnating.
Forbidden,
/// The instance's auto-restart policy indicates that it has attained
/// nirvāṇa and will not reincarnate.
Nirvana,
}

impl InstanceAutoRestart {
Expand All @@ -292,44 +310,58 @@ impl InstanceAutoRestart {
pub const DEFAULT_POLICY: InstanceAutoRestartPolicy =
InstanceAutoRestartPolicy::BestEffort;

/// Returns `true` if `self` permits an instance to reincarnate given the
/// provided `state`.
/// Returns an instance's karmic status.
pub fn status(
&self,
state: &InstanceRuntimeState,
active_vmm: Option<&Vmm>,
) -> InstanceKarmicStatus {
// Instances only need to be automatically restarted if they are in the
// `Failed` state, or if their active VMM is in the `SagaUnwound` state.
match (state.nexus_state, active_vmm) {
let needs_reincarnation = match (state.nexus_state, active_vmm) {
(InstanceState::Failed, _vmm) => {
debug_assert!(
_vmm.is_none(),
"a Failed instance will never have an active VMM!"
);
true
}
(InstanceState::Vmm, Some(ref vmm)) => {
debug_assert_eq!(
state.propolis_id,
vmm.id(),
Some(vmm.id),
"don't call `InstanceAutoRestart::status with a VMM \
that isn't this instance's active VMM!?!?"
);
// Note that we *don't* reincarnate instances with `Failed`` active
// Note that we *don't* reincarnate instances with `Failed` active
// VMMs; in that case, an instance-update saga must first run to
// move the *instance* record to the `Failed` state.
if vmm.runtime.state != VmmState::SagaUnwound {
return InstanceKarmicStatus::NotFailed;
}
vmm.runtime.state == VmmState::SagaUnwound
}
_ => return InstanceKarmicStatus::NotFailed,
_ => false,
};

InstanceKarmicStatus {
needs_reincarnation,
can_reincarnate: self.can_reincarnate(&state),
}
}

/// Returns whether or not this auto-restart configuration will permit an
/// instance with the provided `InstanceRuntimeState` to reincarnate.
///
/// This does *not* indicate that the instance currently needs
/// reincarnation, but instead, whether the instance will be permitted to
/// reincarnate should it be in such a state.
pub fn can_reincarnate(
&self,
state: &InstanceRuntimeState,
) -> Reincarnatability {
// Check if the instance's configured auto-restart policy permits the
// control plane to automatically restart it.
let policy = self.policy.unwrap_or(Self::DEFAULT_POLICY);
if policy == InstanceAutoRestartPolicy::Never {
return InstanceKarmicStatus::Forbidden;
return Reincarnatability::Nirvana;
}

// If the instance is permitted to reincarnate, ensure that its last
Expand All @@ -341,15 +373,15 @@ impl InstanceAutoRestart {
let cooldown = self.cooldown.unwrap_or(Self::DEFAULT_COOLDOWN);
let time_since_last = Utc::now().signed_duration_since(last);
if time_since_last >= cooldown {
return InstanceKarmicStatus::Ready;
return Reincarnatability::WillReincarnate;
} else {
return InstanceKarmicStatus::CoolingDown(
return Reincarnatability::CoolingDown(
cooldown - time_since_last,
);
}
}

InstanceKarmicStatus::Ready
Reincarnatability::WillReincarnate
}

/// Filters a database query to include only instances whose auto-restart
Expand Down
32 changes: 32 additions & 0 deletions nexus/src/app/sagas/instance_start.rs
Original file line number Diff line number Diff line change
Expand Up @@ -233,15 +233,47 @@ async fn sis_destroy_vmm_record(
&params.serialized_authn,
);

let db_instance = params.db_instance;
let propolis_id = sagactx.lookup::<PropolisUuid>("propolis_id")?;
info!(
osagactx.log(),
"destroying vmm record for start saga unwind";
"instance_id" => %db_instance.id(),
"propolis_id" => %propolis_id,
"start_reason" => ?params.reason,
);

osagactx.datastore().vmm_mark_saga_unwound(&opctx, &propolis_id).await?;

// Now that the VMM record has been marked as `SagaUnwound`, the instance
// may be permitted to reincarnate. If it is, activate the instance
// reincarnation background task to help it along.
let karmic_status =
db_instance.auto_restart.can_reincarnate(db_instance.runtime());
if karmic_status == db::model::Reincarnatability::WillReincarnate {
info!(
osagactx.log(),
"start saga unwound; instance may reincarnate";
"instance_id" => %db_instance.id(),
"auto_restart_config" => ?db_instance.auto_restart,
"start_reason" => ?params.reason,
);
osagactx
.nexus()
.background_tasks
.task_instance_reincarnation
.activate();
} else {
debug!(
osagactx.log(),
"start saga unwound; but instance will not reincarnate";
"instance_id" => %db_instance.id(),
"auto_restart_config" => ?db_instance.auto_restart,
"start_reason" => ?params.reason,
"karmic_status" => ?karmic_status,
);
}

Ok(())
}

Expand Down
55 changes: 22 additions & 33 deletions nexus/src/app/sagas/instance_update/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,6 @@ use crate::app::db::datastore::VmmStateUpdateResult;
use crate::app::db::lookup::LookupPath;
use crate::app::db::model::ByteCount;
use crate::app::db::model::Generation;
use crate::app::db::model::InstanceKarmicStatus;
use crate::app::db::model::InstanceRuntimeState;
use crate::app::db::model::InstanceState;
use crate::app::db::model::MigrationState;
Expand Down Expand Up @@ -1288,40 +1287,30 @@ async fn siu_chain_successor_saga(
// auto-restart policy allows it to be automatically restarted. If
// it does, activate the instance-reincarnation background task to
// automatically restart it.
let auto_restart = new_state.instance.auto_restart;
match auto_restart.status(
let karmic_state = new_state.instance.auto_restart.status(
&new_state.instance.runtime_state,
new_state.active_vmm.as_ref(),
) {
InstanceKarmicStatus::Ready => {
info!(
log,
"instance update: instance transitioned to Failed, \
but can be automatically restarted; activating \
reincarnation.";
"instance_id" => %instance_id,
"auto_restart" => ?auto_restart,
"runtime_state" => ?new_state.instance.runtime_state,
);
nexus
.background_tasks
.task_instance_reincarnation
.activate();
}
InstanceKarmicStatus::CoolingDown(remaining) => {
info!(
log,
"instance update: instance transitioned to Failed, \
but is still in cooldown from a previous \
reincarnation";
"instance_id" => %instance_id,
"auto_restart" => ?auto_restart,
"cooldown_remaining" => ?remaining,
"runtime_state" => ?new_state.instance.runtime_state,
);
}
InstanceKarmicStatus::Forbidden
| InstanceKarmicStatus::NotFailed => {}
);
if karmic_state.should_reincarnate() {
info!(
log,
"instance update: instance transitioned to Failed, \
but can be automatically restarted; activating \
reincarnation.";
"instance_id" => %instance_id,
"auto_restart_config" => ?new_state.instance.auto_restart,
"runtime_state" => ?new_state.instance.runtime_state,
);
nexus.background_tasks.task_instance_reincarnation.activate();
} else {
debug!(
log,
"instance update: instance will not reincarnate";
"instance_id" => %instance_id,
"auto_restart_config" => ?new_state.instance.auto_restart,
"needs_reincarnation" => karmic_state.needs_reincarnation,
"karmic_state" => ?karmic_state.can_reincarnate,
)
}
}

Expand Down

0 comments on commit 19f9f16

Please sign in to comment.