From feff3c5c76147ffc319471c41b8bf7d60df3d56e Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 1 Oct 2024 14:40:52 -0700 Subject: [PATCH] [nexus] allow reconfiguring auto-restart policies This commit extends the `instance-reconfigure` API endpoint added in #6585 to also allow setting instance auto-restart policies (as added in #6503). I've also added the actual auto-restart policy to the external API instance view, along with the boolean `auto_restart_enabled` added in #6503. This way, it's possible to change just the boot disk by providing the current auto-restart policy in an instance POST. --- common/src/api/external/mod.rs | 14 ++++++++++ nexus/db-queries/src/db/datastore/instance.rs | 21 +++++++++----- nexus/src/app/instance.rs | 4 ++- nexus/tests/integration_tests/endpoints.rs | 5 +++- nexus/tests/integration_tests/instances.rs | 18 ++++++++++-- openapi/nexus-internal.json | 28 +++++++++++++++++++ openapi/nexus.json | 18 ++++++++++++ 7 files changed, 96 insertions(+), 12 deletions(-) diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index a34f5b71ac7..9ea8cfb6c77 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -1215,6 +1215,20 @@ pub struct InstanceAutoRestartStatus { #[serde(rename = "auto_restart_enabled")] pub enabled: bool, + /// The auto-restart policy configured for this instance, or `None` if no + /// explicit policy is configured. + /// + /// If this is not present, then this instance uses the default auto-restart + /// policy, which may or may not allow it to be restarted. The + /// `auto_restart_enabled` field indicates whether the instance will be + /// automatically restarted. + // + // Rename this field, as the struct is `#[serde(flatten)]`ed into the + // `Instance` type, and we would like the field to be prefixed with + // `auto_restart`. + #[serde(rename = "auto_restart_policy")] + pub policy: Option, + /// The time at which the auto-restart cooldown period for this instance /// completes, permitting it to be automatically restarted again. If the /// instance enters the `Failed` state, it will not be restarted until after diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index e89cd8f234d..28644b2be88 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -223,16 +223,23 @@ impl From for external::Instance { }, ); - let policy = value - .instance - .auto_restart - .policy - .unwrap_or(InstanceAutoRestart::DEFAULT_POLICY); - let enabled = match policy { + let policy = value.instance.auto_restart.policy; + // The active policy for this instance --- either its configured + // policy or the default. We report the configured policy as the + // instance's policy, but we must use this to determine whether it + // will be auto-restarted, since it may have no configured policy. + let active_policy = + policy.unwrap_or(InstanceAutoRestart::DEFAULT_POLICY); + + let enabled = match active_policy { InstanceAutoRestartPolicy::Never => false, InstanceAutoRestartPolicy::BestEffort => true, }; - external::InstanceAutoRestartStatus { enabled, cooldown_expiration } + external::InstanceAutoRestartStatus { + enabled, + policy: policy.map(Into::into), + cooldown_expiration, + } }; Self { diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index ca8c441a418..b4ad778735f 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -330,7 +330,9 @@ impl super::Nexus { None => None, }; - let update = InstanceUpdate { boot_disk_id }; + let auto_restart_policy = params.auto_restart_policy.map(Into::into); + + let update = InstanceUpdate { boot_disk_id, auto_restart_policy }; self.datastore() .instance_reconfigure(opctx, &authz_instance, update) .await diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index ff4c5a87123..1aac5a186cf 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -434,7 +434,10 @@ pub static DEMO_INSTANCE_CREATE: Lazy = auto_restart_policy: Default::default(), }); pub static DEMO_INSTANCE_UPDATE: Lazy = - Lazy::new(|| params::InstanceUpdate { boot_disk: None }); + Lazy::new(|| params::InstanceUpdate { + boot_disk: None, + auto_restart_policy: None, + }); // The instance needs a network interface, too. pub static DEMO_INSTANCE_NIC_NAME: Lazy = diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 6a94a189bd3..78c33c35621 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -4133,7 +4133,10 @@ async fn test_cannot_detach_boot_disk(cptestctx: &ControlPlaneTestContext) { let builder = RequestBuilder::new(client, http::Method::PUT, &url_instance_update) - .body(Some(¶ms::InstanceUpdate { boot_disk: None })) + .body(Some(¶ms::InstanceUpdate { + boot_disk: None, + auto_restart_policy: None, + })) .expect_status(Some(http::StatusCode::OK)); let response = NexusRequest::new(builder) .authn_as(AuthnMode::PrivilegedUser) @@ -4210,7 +4213,10 @@ async fn test_updating_running_instance_is_conflict( let builder = RequestBuilder::new(client, http::Method::PUT, &url_instance_update) - .body(Some(¶ms::InstanceUpdate { boot_disk: None })) + .body(Some(¶ms::InstanceUpdate { + boot_disk: None, + auto_restart_policy: None, + })) .expect_status(Some(http::StatusCode::CONFLICT)); let response = NexusRequest::new(builder) @@ -4236,7 +4242,10 @@ async fn test_updating_missing_instance_is_not_found( let builder = RequestBuilder::new(client, http::Method::PUT, &url_instance_update) - .body(Some(¶ms::InstanceUpdate { boot_disk: None })) + .body(Some(¶ms::InstanceUpdate { + boot_disk: None, + auto_restart_policy: None, + })) .expect_status(Some(http::StatusCode::NOT_FOUND)); let response = NexusRequest::new(builder) @@ -4325,6 +4334,7 @@ async fn test_boot_disk_can_be_changed(cptestctx: &ControlPlaneTestContext) { RequestBuilder::new(client, http::Method::PUT, &url_instance_update) .body(Some(¶ms::InstanceUpdate { boot_disk: Some(disks[1].identity.id.into()), + auto_restart_policy: None, })) .expect_status(Some(http::StatusCode::OK)); @@ -4396,6 +4406,7 @@ async fn test_boot_disk_must_be_attached(cptestctx: &ControlPlaneTestContext) { RequestBuilder::new(client, http::Method::PUT, &url_instance_update) .body(Some(¶ms::InstanceUpdate { boot_disk: Some(disks[0].identity.id.into()), + auto_restart_policy: None, })) .expect_status(Some(http::StatusCode::CONFLICT)); let response = NexusRequest::new(builder) @@ -4431,6 +4442,7 @@ async fn test_boot_disk_must_be_attached(cptestctx: &ControlPlaneTestContext) { RequestBuilder::new(client, http::Method::PUT, &url_instance_update) .body(Some(¶ms::InstanceUpdate { boot_disk: Some(disks[0].identity.id.into()), + auto_restart_policy: None, })) .expect_status(Some(http::StatusCode::OK)); let response = NexusRequest::new(builder) diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 9e05ff72f9c..abbfeb350db 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -3305,6 +3305,15 @@ "description": "`true` if this instance's auto-restart policy will permit the control plane to automatically restart it if it enters the `Failed` state.", "type": "boolean" }, + "auto_restart_policy": { + "nullable": true, + "description": "The auto-restart policy configured for this instance, or `None` if no explicit policy is configured.\n\nIf this is not present, then this instance uses the default auto-restart policy, which may or may not allow it to be restarted. The `auto_restart_enabled` field indicates whether the instance will be automatically restarted.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceAutoRestartPolicy" + } + ] + }, "boot_disk_id": { "nullable": true, "description": "the ID of the disk used to boot this Instance, if a specific one is assigned.", @@ -3392,6 +3401,25 @@ "time_run_state_updated" ] }, + "InstanceAutoRestartPolicy": { + "description": "A policy determining when an instance should be automatically restarted by the control plane.", + "oneOf": [ + { + "description": "The instance should not be automatically restarted by the control plane if it fails.", + "type": "string", + "enum": [ + "never" + ] + }, + { + "description": "If this instance is running and unexpectedly fails (e.g. due to a host software crash or unexpected host reboot), the control plane will make a best-effort attempt to restart it. The control plane may choose not to restart the instance to preserve the overall availability of the system.", + "type": "string", + "enum": [ + "best_effort" + ] + } + ] + }, "InstanceCpuCount": { "description": "The number of CPUs in an Instance", "type": "integer", diff --git a/openapi/nexus.json b/openapi/nexus.json index 74333bef82b..92e28b3cbdd 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -15156,6 +15156,15 @@ "description": "`true` if this instance's auto-restart policy will permit the control plane to automatically restart it if it enters the `Failed` state.", "type": "boolean" }, + "auto_restart_policy": { + "nullable": true, + "description": "The auto-restart policy configured for this instance, or `None` if no explicit policy is configured.\n\nIf this is not present, then this instance uses the default auto-restart policy, which may or may not allow it to be restarted. The `auto_restart_enabled` field indicates whether the instance will be automatically restarted.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceAutoRestartPolicy" + } + ] + }, "boot_disk_id": { "nullable": true, "description": "the ID of the disk used to boot this Instance, if a specific one is assigned.", @@ -15792,6 +15801,15 @@ "description": "Parameters of an `Instance` that can be reconfigured after creation.", "type": "object", "properties": { + "auto_restart_policy": { + "nullable": true, + "description": "The auto-restart policy for this instance.\n\nIf not provided, unset the instance's auto-restart policy.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceAutoRestartPolicy" + } + ] + }, "boot_disk": { "nullable": true, "description": "Name or ID of the disk the instance should be instructed to boot from.\n\nIf not provided, unset the instance's boot disk.",