Skip to content

Commit

Permalink
make "sled failures only" the default policy
Browse files Browse the repository at this point in the history
  • Loading branch information
hawkw committed Sep 2, 2024
1 parent 9ca4c5e commit 2db6eff
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 8 deletions.
2 changes: 1 addition & 1 deletion nexus/db-model/src/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ impl Instance {
ncpus: params.ncpus.into(),
memory: params.memory.into(),
hostname: params.hostname.to_string(),
auto_restart_policy: InstanceAutoRestart::Never,
auto_restart_policy: InstanceAutoRestart::default(),
runtime_state,

updater_gen: Generation::new(),
Expand Down
12 changes: 10 additions & 2 deletions nexus/db-model/src/instance_auto_restart.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,26 @@ impl_enum_type!(

// Enum values
Never => b"never"
SledFailuresOnly => b"sled_failures_only"
AllFailures => b"all_failures"
);

impl InstanceAutoRestart {
pub fn label(&self) -> &'static str {
match self {
InstanceAutoRestart::Never => "never",
InstanceAutoRestart::AllFailures => "all_failures",
Self::Never => "never",
Self::SledFailuresOnly => "sled_failures_only",
Self::AllFailures => "all_failures",
}
}
}

impl Default for InstanceAutoRestart {
fn default() -> Self {
Self::SledFailuresOnly
}
}

impl fmt::Display for InstanceAutoRestart {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.label())
Expand Down
7 changes: 7 additions & 0 deletions schema/crdb/dbinit.sql
Original file line number Diff line number Diff line change
Expand Up @@ -1026,6 +1026,13 @@ CREATE TYPE IF NOT EXISTS omicron.public.instance_auto_restart AS ENUM (
* rebooted by the control plane.
*/
'never',
/*
* The instance should be automatically restarted if, and only if, the sled
* it was running on has restarted or become unavailable. If the individual
* Propolis VMM process for this instance crashes, it should *not* be
* restarted automatically.
*/
'sled_failures_only',
/*
* The instance should be automatically restarted any time a fault is
* detected
Expand Down
8 changes: 4 additions & 4 deletions schema/crdb/turn-boot-on-fault-into-auto-restart/README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
This migration replaces the `omicron.public.instance.boot_on_fault` column,
which is a `bool`, with a new `auto_restart_policy` column, which is an enum
(`omicron.public.instance_auto_restart`). The new enum type will allow
auto-restart policies other than "always" and "never" to be added in the future.
auto-restart policies other than "always" and "never".
Existing instance records are backfilled with the `all_failures` variant of
`instance_auto_restart` if `boot_on_fault` is `true`, or `never` if
`instance_auto_restart` if `boot_on_fault` is `true`, or `sled_failures_only` if
`boot_on_fault` is `false`.

The migration performs the following operations:
Expand All @@ -14,8 +14,8 @@ The migration performs the following operations:
2. `up02.sql` adds a (nullable) `auto_restart_policy` column to the `instance`
table.
3. `up03.sql` updates instance records by setting `auto_restart_policy` to
`all_failures` if `boot_on_fault` is `true`, or `never` if `boot_on_fault` is
`false`.
`all_failures` if `boot_on_fault` is `true`, or `sled_failures_only` if
`boot_on_fault` is `false`.
4. Now that all instance records have a value for `auto_restart_policy`,
`up04.sql` makes the `auto_restart_policy` column non-null.
5. Finally, `up05.sql` drops the now-defunct `boot_on_fault` column.
7 changes: 7 additions & 0 deletions schema/crdb/turn-boot-on-fault-into-auto-restart/up01.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ CREATE TYPE IF NOT EXISTS omicron.public.instance_auto_restart AS ENUM (
* rebooted by the control plane.
*/
'never',
/*
* The instance should be automatically restarted if, and only if, the sled
* it was running on has restarted or become unavailable. If the individual
* Propolis VMM process for this instance crashes, it should *not* be
* restarted automatically.
*/
'sled_failures_only',
/*
* The instance should be automatically restarted any time a fault is
* detected
Expand Down
2 changes: 1 addition & 1 deletion schema/crdb/turn-boot-on-fault-into-auto-restart/up03.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SET LOCAL disallow_full_table_scans = off;
UPDATE omicron.public.instance SET auto_restart_policy = CASE
WHEN boot_on_fault = true THEN 'all_failures'
ELSE 'never'
ELSE 'sled_failures_only'
END;

0 comments on commit 2db6eff

Please sign in to comment.