Skip to content

Commit

Permalink
also include start reason in logs
Browse files Browse the repository at this point in the history
this just seemed nice to do...
  • Loading branch information
hawkw committed Sep 5, 2024
1 parent 04e2740 commit ba41dda
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 24 deletions.
66 changes: 52 additions & 14 deletions nexus/src/app/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ use propolis_client::support::InstanceSerialConsoleHelper;
use propolis_client::support::WSClientOffset;
use propolis_client::support::WebSocketStream;
use sagas::instance_common::ExternalIpAttach;
use sagas::instance_start;
use sagas::instance_update;
use sled_agent_client::types::InstanceMigrationTargetParams;
use sled_agent_client::types::InstanceProperties;
Expand Down Expand Up @@ -467,7 +468,7 @@ impl super::Nexus {
.instance_start(
opctx,
&lookup,
sagas::instance_start::Reason::AutoStart,
instance_start::Reason::AutoStart,
)
.await;
if let Err(e) = start_result {
Expand Down Expand Up @@ -644,7 +645,7 @@ impl super::Nexus {
self: &Arc<Self>,
opctx: &OpContext,
instance_lookup: &lookup::Instance<'_>,
reason: sagas::instance_start::Reason,
reason: instance_start::Reason,
) -> Result<InstanceAndActiveVmm, InstanceStateChangeError> {
let (.., authz_instance) =
instance_lookup.lookup_for(authz::Action::Modify).await?;
Expand All @@ -654,7 +655,7 @@ impl super::Nexus {
.instance_fetch_with_vmm(opctx, &authz_instance)
.await?;

match instance_start_allowed(&self.log, &state)? {
match instance_start_allowed(&self.log, &state, reason)? {
InstanceStartDisposition::AlreadyStarted => Ok(state),
InstanceStartDisposition::Start => {
let saga_params = sagas::instance_start::Params {
Expand Down Expand Up @@ -2047,6 +2048,7 @@ pub(crate) async fn process_vmm_update(
fn instance_start_allowed(
log: &slog::Logger,
state: &InstanceAndActiveVmm,
reason: instance_start::Reason,
) -> Result<InstanceStartDisposition, Error> {
let (instance, vmm) = (state.instance(), state.vmm());

Expand All @@ -2065,7 +2067,8 @@ fn instance_start_allowed(
| s @ InstanceState::Migrating => {
debug!(log, "asked to start an active instance";
"instance_id" => %instance.id(),
"state" => ?s);
"state" => ?s,
"start_reason" => ?reason);

Ok(InstanceStartDisposition::AlreadyStarted)
}
Expand All @@ -2078,7 +2081,8 @@ fn instance_start_allowed(
debug!(
log,
"instance's last VMM's start saga unwound, OK to start";
"instance_id" => %instance.id()
"instance_id" => %instance.id(),
"start_reason" => ?reason,
);

Ok(InstanceStartDisposition::Start)
Expand All @@ -2091,7 +2095,8 @@ fn instance_start_allowed(
"instance is {s:?} but still has an active VMM";
"instance_id" => %instance.id(),
"propolis_id" => %vmm.id,
"propolis_state" => ?vmm.runtime.state);
"propolis_state" => ?vmm.runtime.state,
"start_reason" => ?reason);

Err(Error::InternalError {
internal_message: format!(
Expand All @@ -2111,7 +2116,8 @@ fn instance_start_allowed(
debug!(log, "instance's VMM is still in the process of stopping";
"instance_id" => %instance.id(),
"propolis_id" => ?propolis_id,
"propolis_state" => ?propolis_state);
"propolis_state" => ?propolis_state,
"start_reason" => ?reason);
Err(Error::conflict(
"instance must finish stopping before it can be started",
))
Expand Down Expand Up @@ -2283,7 +2289,9 @@ mod tests {
let (mut instance, _vmm) = make_instance_and_vmm();
instance.runtime_state.nexus_state = DbInstanceState::NoVmm;
let state = InstanceAndActiveVmm::from((instance, None));
assert!(instance_start_allowed(&logctx.log, &state).is_ok());
assert!(
instance_start_allowed(&logctx.log, &state, Reason::User).is_ok()
);
logctx.cleanup_successful();
}

Expand All @@ -2297,7 +2305,12 @@ mod tests {
instance.runtime_state.propolis_id = Some(vmm.id);
vmm.runtime.state = DbVmmState::SagaUnwound;
let state = InstanceAndActiveVmm::from((instance, Some(vmm)));
assert!(instance_start_allowed(&logctx.log, &state).is_ok());
assert!(instance_start_allowed(
&logctx.log,
&state,
instance_start::Reason::User
)
.is_ok());
logctx.cleanup_successful();
}

Expand All @@ -2308,7 +2321,12 @@ mod tests {
let (mut instance, _vmm) = make_instance_and_vmm();
instance.runtime_state.nexus_state = DbInstanceState::Creating;
let state = InstanceAndActiveVmm::from((instance, None));
assert!(instance_start_allowed(&logctx.log, &state).is_err());
assert!(instance_start_allowed(
&logctx.log,
&state,
instance_start::Reason::User
)
.is_err());
logctx.cleanup_successful();
}

Expand All @@ -2321,21 +2339,41 @@ mod tests {
vmm.runtime.state = DbVmmState::Starting;
let state =
InstanceAndActiveVmm::from((instance.clone(), Some(vmm.clone())));
assert!(instance_start_allowed(&logctx.log, &state).is_ok());
assert!(instance_start_allowed(
&logctx.log,
&state,
instance_start::Reason::User
)
.is_ok());

vmm.runtime.state = DbVmmState::Running;
let state =
InstanceAndActiveVmm::from((instance.clone(), Some(vmm.clone())));
assert!(instance_start_allowed(&logctx.log, &state).is_ok());
assert!(instance_start_allowed(
&logctx.log,
&state,
instance_start::Reason::User
)
.is_ok());

vmm.runtime.state = DbVmmState::Rebooting;
let state =
InstanceAndActiveVmm::from((instance.clone(), Some(vmm.clone())));
assert!(instance_start_allowed(&logctx.log, &state).is_ok());
assert!(instance_start_allowed(
&logctx.log,
&state,
instance_start::Reason::User
)
.is_ok());

vmm.runtime.state = DbVmmState::Migrating;
let state = InstanceAndActiveVmm::from((instance, Some(vmm)));
assert!(instance_start_allowed(&logctx.log, &state).is_ok());
assert!(instance_start_allowed(
&logctx.log,
&state,
instance_start::Reason::User
)
.is_ok());
logctx.cleanup_successful();
}
}
37 changes: 27 additions & 10 deletions nexus/src/app/sagas/instance_start.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ async fn sis_destroy_vmm_record(
osagactx.log(),
"destroying vmm record for start saga unwind";
"propolis_id" => %propolis_id,
"start_reason" => ?params.reason,
);

osagactx.datastore().vmm_mark_saga_unwound(&opctx, &propolis_id).await?;
Expand All @@ -257,7 +258,8 @@ async fn sis_move_to_starting(
let propolis_id = sagactx.lookup::<PropolisUuid>("propolis_id")?;
info!(osagactx.log(), "moving instance to Starting state via saga";
"instance_id" => %instance_id,
"propolis_id" => %propolis_id);
"propolis_id" => %propolis_id,
"start_reason" => ?params.reason);

let opctx = crate::context::op_context_for_saga_action(
&sagactx,
Expand Down Expand Up @@ -288,7 +290,8 @@ async fn sis_move_to_starting(
// proceed.
Some(vmm) if vmm.id == propolis_id.into_untyped_uuid() => {
info!(osagactx.log(), "start saga: Propolis ID already set";
"instance_id" => %instance_id);
"instance_id" => %instance_id,
"start_reason" => ?params.reason);

return Ok(db_instance.clone());
}
Expand Down Expand Up @@ -452,7 +455,8 @@ async fn sis_dpd_ensure(
let instance_id = InstanceUuid::from_untyped_uuid(db_instance.id());

info!(osagactx.log(), "start saga: ensuring instance dpd configuration";
"instance_id" => %instance_id);
"instance_id" => %instance_id,
"start_reason" => ?params.reason);

let opctx = crate::context::op_context_for_saga_action(
&sagactx,
Expand Down Expand Up @@ -491,7 +495,8 @@ async fn sis_dpd_ensure_undo(
);

info!(log, "start saga: undoing dpd configuration";
"instance_id" => %instance_id);
"instance_id" => %instance_id,
"start_reason" => ?params.reason);

let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore())
.instance_id(instance_id)
Expand Down Expand Up @@ -543,7 +548,8 @@ async fn sis_ensure_registered(

info!(osagactx.log(), "start saga: ensuring instance is registered on sled";
"instance_id" => %instance_id,
"sled_id" => %sled_id);
"sled_id" => %sled_id,
"start_reason" => ?params.reason);

let (authz_silo, authz_project, authz_instance) =
LookupPath::new(&opctx, &osagactx.datastore())
Expand Down Expand Up @@ -573,7 +579,8 @@ async fn sis_ensure_registered(
"start saga: sled agent failed to register instance";
"instance_id" => %instance_id,
"sled_id" => %sled_id,
"error" => ?inner);
"error" => ?inner,
"start_reason" => ?params.reason);

// Don't set the instance to Failed in this case. Instead, allow
// the saga to unwind and restore the instance to the Stopped
Expand All @@ -585,7 +592,8 @@ async fn sis_ensure_registered(
info!(osagactx.log(),
"start saga: internal error registering instance";
"instance_id" => %instance_id,
"error" => ?inner);
"error" => ?inner,
"start_reason" => ?params.reason);
ActionError::action_failed(inner)
}
})
Expand All @@ -609,7 +617,8 @@ async fn sis_ensure_registered_undo(
info!(osagactx.log(), "start saga: unregistering instance from sled";
"instance_id" => %instance_id,
"propolis_id" => %propolis_id,
"sled_id" => %sled_id);
"sled_id" => %sled_id,
"start_reason" => ?params.reason);

// Fetch the latest record so that this callee can drive the instance into
// a Failed state if the unregister call fails.
Expand All @@ -632,6 +641,7 @@ async fn sis_ensure_registered_undo(
error!(osagactx.log(),
"start saga: failed to unregister instance from sled";
"instance_id" => %instance_id,
"start_reason" => ?params.reason,
"error" => ?e);

// If the failure came from talking to sled agent, and the error code
Expand All @@ -658,6 +668,7 @@ async fn sis_ensure_registered_undo(
error!(osagactx.log(),
"start saga: failing instance after unregister failure";
"instance_id" => %instance_id,
"start_reason" => ?params.reason,
"error" => ?inner);

if let Err(set_failed_error) = osagactx
Expand All @@ -668,6 +679,7 @@ async fn sis_ensure_registered_undo(
error!(osagactx.log(),
"start saga: failed to mark instance as failed";
"instance_id" => %instance_id,
"start_reason" => ?params.reason,
"error" => ?set_failed_error);

Err(set_failed_error.into())
Expand All @@ -678,14 +690,16 @@ async fn sis_ensure_registered_undo(
InstanceStateChangeError::SledAgent(_) => {
info!(osagactx.log(),
"start saga: instance already unregistered from sled";
"instance_id" => %instance_id);
"instance_id" => %instance_id,
"start_reason" => ?params.reason);

Ok(())
}
InstanceStateChangeError::Other(inner) => {
error!(osagactx.log(),
"start saga: internal error unregistering instance";
"instance_id" => %instance_id,
"start_reason" => ?params.reason,
"error" => ?inner);

Err(inner.into())
Expand Down Expand Up @@ -713,7 +727,8 @@ async fn sis_ensure_running(
let sled_id = sagactx.lookup::<SledUuid>("sled_id")?;
info!(osagactx.log(), "start saga: ensuring instance is running";
"instance_id" => %instance_id,
"sled_id" => %sled_id);
"sled_id" => %sled_id,
"start_reason" => ?params.reason);

match osagactx
.nexus()
Expand All @@ -731,6 +746,7 @@ async fn sis_ensure_running(
"start saga: sled agent failed to set instance to running";
"instance_id" => %instance_id,
"sled_id" => %sled_id,
"start_reason" => ?params.reason,
"error" => ?inner);

// Don't set the instance to Failed in this case. Instead, allow
Expand All @@ -743,6 +759,7 @@ async fn sis_ensure_running(
info!(osagactx.log(),
"start saga: internal error changing instance state";
"instance_id" => %instance_id,
"start_reason" => ?params.reason,
"error" => ?inner);

Err(ActionError::action_failed(inner))
Expand Down

0 comments on commit ba41dda

Please sign in to comment.