Skip to content

Commit

Permalink
[reconfigurator] a couple of type improvements (#6395)
Browse files Browse the repository at this point in the history
* Use a typestate for the physical disk dependency. Just want to make
sure the ordering is right once this moves to being expressed via the
update engine.
* Add strong typing around the output of `realize_blueprint`.
  • Loading branch information
sunshowers authored Aug 21, 2024
1 parent 2e0025c commit ab29a02
Show file tree
Hide file tree
Showing 5 changed files with 156 additions and 92 deletions.
22 changes: 12 additions & 10 deletions nexus/reconfigurator/execution/src/cockroachdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ pub(crate) async fn ensure_settings(
mod test {
use super::*;
use crate::overridables::Overridables;
use crate::RealizeBlueprintOutput;
use nexus_db_queries::authn;
use nexus_db_queries::authz;
use nexus_test_utils_macros::nexus_test;
Expand Down Expand Up @@ -97,16 +98,17 @@ mod test {
.await;
// Execute the initial blueprint.
let overrides = Overridables::for_test(cptestctx);
crate::realize_blueprint_with_overrides(
&opctx,
datastore,
resolver,
&blueprint,
Uuid::new_v4(),
&overrides,
)
.await
.expect("failed to execute initial blueprint");
let _: RealizeBlueprintOutput =
crate::realize_blueprint_with_overrides(
&opctx,
datastore,
resolver,
&blueprint,
Uuid::new_v4(),
&overrides,
)
.await
.expect("failed to execute initial blueprint");
// The CockroachDB settings should not have changed.
assert_eq!(
settings,
Expand Down
106 changes: 56 additions & 50 deletions nexus/reconfigurator/execution/src/dns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ pub fn blueprint_nexus_external_ips(blueprint: &Blueprint) -> Vec<IpAddr> {
mod test {
use super::*;
use crate::overridables::Overridables;
use crate::RealizeBlueprintOutput;
use crate::Sled;
use dns_service_client::DnsDiff;
use internal_dns::config::Host;
Expand Down Expand Up @@ -1245,16 +1246,17 @@ mod test {

// Now, execute the initial blueprint.
let overrides = Overridables::for_test(cptestctx);
crate::realize_blueprint_with_overrides(
&opctx,
datastore,
resolver,
&blueprint,
Uuid::new_v4(),
&overrides,
)
.await
.expect("failed to execute initial blueprint");
let _: RealizeBlueprintOutput =
crate::realize_blueprint_with_overrides(
&opctx,
datastore,
resolver,
&blueprint,
Uuid::new_v4(),
&overrides,
)
.await
.expect("failed to execute initial blueprint");

// DNS ought not to have changed.
verify_dns_unchanged(
Expand Down Expand Up @@ -1385,16 +1387,17 @@ mod test {
.await
.expect("failed to set blueprint as target");

crate::realize_blueprint_with_overrides(
&opctx,
datastore,
resolver,
&blueprint2,
Uuid::new_v4(),
&overrides,
)
.await
.expect("failed to execute second blueprint");
let _: RealizeBlueprintOutput =
crate::realize_blueprint_with_overrides(
&opctx,
datastore,
resolver,
&blueprint2,
Uuid::new_v4(),
&overrides,
)
.await
.expect("failed to execute second blueprint");

// Now fetch DNS again. Both should have changed this time.
let dns_latest_internal = datastore
Expand Down Expand Up @@ -1459,16 +1462,17 @@ mod test {
}

// If we execute it again, we should see no more changes.
crate::realize_blueprint_with_overrides(
&opctx,
datastore,
resolver,
&blueprint2,
Uuid::new_v4(),
&overrides,
)
.await
.expect("failed to execute second blueprint again");
let _: RealizeBlueprintOutput =
crate::realize_blueprint_with_overrides(
&opctx,
datastore,
resolver,
&blueprint2,
Uuid::new_v4(),
&overrides,
)
.await
.expect("failed to execute second blueprint again");
verify_dns_unchanged(
&opctx,
datastore,
Expand All @@ -1495,16 +1499,17 @@ mod test {

// One more time, make sure that executing the blueprint does not do
// anything.
crate::realize_blueprint_with_overrides(
&opctx,
datastore,
resolver,
&blueprint2,
Uuid::new_v4(),
&overrides,
)
.await
.expect("failed to execute second blueprint again");
let _: RealizeBlueprintOutput =
crate::realize_blueprint_with_overrides(
&opctx,
datastore,
resolver,
&blueprint2,
Uuid::new_v4(),
&overrides,
)
.await
.expect("failed to execute second blueprint again");
verify_dns_unchanged(
&opctx,
datastore,
Expand Down Expand Up @@ -1589,16 +1594,17 @@ mod test {
);

// If we execute the blueprint, DNS should not be changed.
crate::realize_blueprint_with_overrides(
&opctx,
datastore,
resolver,
&blueprint,
Uuid::new_v4(),
&overrides,
)
.await
.expect("failed to execute blueprint");
let _: RealizeBlueprintOutput =
crate::realize_blueprint_with_overrides(
&opctx,
datastore,
resolver,
&blueprint,
Uuid::new_v4(),
&overrides,
)
.await
.expect("failed to execute blueprint");
let dns_latest_internal = datastore
.dns_config_read(&opctx, DnsGroup::Internal)
.await
Expand Down
28 changes: 19 additions & 9 deletions nexus/reconfigurator/execution/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ impl From<nexus_db_model::Sled> for Sled {
}
}

/// The result of calling [`realize_blueprint`] or
/// [`realize_blueprint_with_overrides`].
#[derive(Debug)]
#[must_use = "the output of realize_blueprint should probably be used"]
pub struct RealizeBlueprintOutput {
/// Whether any sagas need to be reassigned to a new Nexus.
pub needs_saga_recovery: bool,
}

/// Make one attempt to realize the given blueprint, meaning to take actions to
/// alter the real system to match the blueprint
///
Expand All @@ -81,7 +90,7 @@ pub async fn realize_blueprint(
resolver: &Resolver,
blueprint: &Blueprint,
nexus_id: Uuid,
) -> Result<bool, Vec<anyhow::Error>> {
) -> Result<RealizeBlueprintOutput, Vec<anyhow::Error>> {
realize_blueprint_with_overrides(
opctx,
datastore,
Expand All @@ -100,7 +109,7 @@ pub async fn realize_blueprint_with_overrides(
blueprint: &Blueprint,
nexus_id: Uuid,
overrides: &Overridables,
) -> Result<bool, Vec<anyhow::Error>> {
) -> Result<RealizeBlueprintOutput, Vec<anyhow::Error>> {
let opctx = opctx.child(BTreeMap::from([(
"comment".to_string(),
blueprint.comment.clone(),
Expand Down Expand Up @@ -132,7 +141,7 @@ pub async fn realize_blueprint_with_overrides(
})
.collect();

omicron_physical_disks::deploy_disks(
let deploy_disks_done = omicron_physical_disks::deploy_disks(
&opctx,
&sleds_by_id,
&blueprint.blueprint_disks,
Expand Down Expand Up @@ -205,11 +214,12 @@ pub async fn realize_blueprint_with_overrides(
)
.await?;

// This depends on the "deploy_disks" call earlier -- disk expungement is a
// statement of policy, but we need to be assured that the Sled Agent has
// stopped using that disk before we can mark its state as decommissioned.
omicron_physical_disks::decommission_expunged_disks(&opctx, datastore)
.await?;
omicron_physical_disks::decommission_expunged_disks(
&opctx,
datastore,
deploy_disks_done,
)
.await?;

// From this point on, we'll assume that any errors that we encounter do
// *not* require stopping execution. We'll just accumulate them and return
Expand Down Expand Up @@ -244,7 +254,7 @@ pub async fn realize_blueprint_with_overrides(
}

if errors.is_empty() {
Ok(needs_saga_recovery)
Ok(RealizeBlueprintOutput { needs_saga_recovery })
} else {
Err(errors)
}
Expand Down
51 changes: 38 additions & 13 deletions nexus/reconfigurator/execution/src/omicron_physical_disks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pub(crate) async fn deploy_disks(
opctx: &OpContext,
sleds_by_id: &BTreeMap<SledUuid, Sled>,
sled_configs: &BTreeMap<SledUuid, BlueprintPhysicalDisksConfig>,
) -> Result<(), Vec<anyhow::Error>> {
) -> Result<DeployDisksDone, Vec<anyhow::Error>> {
let errors: Vec<_> = stream::iter(sled_configs)
.filter_map(|(sled_id, config)| async move {
let log = opctx.log.new(o!(
Expand Down Expand Up @@ -92,16 +92,26 @@ pub(crate) async fn deploy_disks(
.await;

if errors.is_empty() {
Ok(())
Ok(DeployDisksDone {})
} else {
Err(errors)
}
}

/// Decommissions all disks which are currently expunged
/// Typestate indicating that the deploy disks step was performed.
#[derive(Debug)]
#[must_use = "this should be passed into decommission_expunged_disks"]
pub(crate) struct DeployDisksDone {}

/// Decommissions all disks which are currently expunged.
pub(crate) async fn decommission_expunged_disks(
opctx: &OpContext,
datastore: &DataStore,
// This is taken as a parameter to ensure that this depends on a
// "deploy_disks" call made earlier. Disk expungement is a statement of
// policy, but we need to be assured that the Sled Agent has stopped using
// that disk before we can mark its state as decommissioned.
_deploy_disks_done: DeployDisksDone,
) -> Result<(), Vec<anyhow::Error>> {
datastore
.physical_disk_decommission_all_expunged(&opctx)
Expand All @@ -113,6 +123,7 @@ pub(crate) async fn decommission_expunged_disks(
#[cfg(test)]
mod test {
use super::deploy_disks;
use super::DeployDisksDone;

use crate::DataStore;
use crate::Sled;
Expand Down Expand Up @@ -217,9 +228,13 @@ mod test {
// Get a success result back when the blueprint has an empty set of
// disks.
let (_, blueprint) = create_blueprint(BTreeMap::new());
deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks)
.await
.expect("failed to deploy no disks");
// Use an explicit type here because not doing so can cause errors to
// be ignored (this behavior is genuinely terrible). Instead, ensure
// that the type has the right result.
let _: DeployDisksDone =
deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks)
.await
.expect("failed to deploy no disks");

// Disks are updated in a particular order, but each request contains
// the full set of disks that must be running.
Expand Down Expand Up @@ -272,9 +287,10 @@ mod test {
}

// Execute it.
deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks)
.await
.expect("failed to deploy initial disks");
let _: DeployDisksDone =
deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks)
.await
.expect("failed to deploy initial disks");

s1.verify_and_clear();
s2.verify_and_clear();
Expand All @@ -293,9 +309,10 @@ mod test {
)),
);
}
deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks)
.await
.expect("failed to deploy same disks");
let _: DeployDisksDone =
deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks)
.await
.expect("failed to deploy same disks");
s1.verify_and_clear();
s2.verify_and_clear();

Expand Down Expand Up @@ -567,7 +584,15 @@ mod test {
assert_eq!(d.disk_state, PhysicalDiskState::Active);
assert_eq!(d.disk_policy, PhysicalDiskPolicy::InService);

super::decommission_expunged_disks(&opctx, &datastore).await.unwrap();
super::decommission_expunged_disks(
&opctx,
&datastore,
// This is an internal test, and we're testing decommissioning in
// isolation, so it's okay to create the typestate here.
DeployDisksDone {},
)
.await
.unwrap();

// After decommissioning, we see the expunged disk become
// decommissioned. The other disk remains in-service.
Expand Down
Loading

0 comments on commit ab29a02

Please sign in to comment.