Skip to content

Commit

Permalink
Fix for deleted volumes during region replacement (#6659)
Browse files Browse the repository at this point in the history
Volumes can be deleted at any time, but the tasks and sagas that perform
region replacement did not account for this. This commit adds checks in
a few places for if a volume is soft-deleted or hard-deleted, and bails
out of any affected region replacement accordingly:

- If the replacement request is in the Requested state and the volume
was seen to be soft-deleted or hard-deleted in the "region replacement"
background task, then transition the region replacement request to
Complete

- If the replacement request is in the Running state, and the volume was
seen to be soft-deleted or hard-deleted in the region replacement drive
saga, then skip any operations on that volume in that saga and allow
that saga to transition the region replacement request to
ReplacementDone. Later the rest of the region replacement machinery will
transition the request to Complete and clean up resources as
appropriate.

Testing this required fleshing out the simulated Crucible Pantry with
support for the new endpoints that the region replacement drive saga
queries. Full parity is left for future work, and the endpoints required
were left in but commented out.

This commit was peeled off work in progress to address #6353.
  • Loading branch information
jmpesp authored Oct 2, 2024
1 parent 6d65e1e commit 1b82134
Show file tree
Hide file tree
Showing 20 changed files with 1,282 additions and 102 deletions.
8 changes: 8 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1129,6 +1129,14 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
println!(" > {line}");
}

println!(
" region replacement requests set to completed ok: {}",
status.requests_completed_ok.len()
);
for line in &status.requests_completed_ok {
println!(" > {line}");
}

println!(" errors: {}", status.errors.len());
for line in &status.errors {
println!(" > {line}");
Expand Down
116 changes: 59 additions & 57 deletions dev-tools/omdb/tests/successes.out

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions nexus/db-model/src/region_replacement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,13 @@ impl std::str::FromStr for RegionReplacementState {
/// "finish" notification is seen by the region replacement drive background
/// task. This check is done before invoking the region replacement drive saga.
///
/// If the volume whose region is being replaced is soft-deleted or
/// hard-deleted, then the replacement request will be transitioned along the
/// states to Complete while avoiding operations that are meant to operate on
/// that volume. If the volume is soft-deleted or hard-deleted while the
/// replacement request is in the "Requested" state, the replacement request
/// will transition straight to Complete, and no operations will be performed.
///
/// See also: RegionReplacementStep records
#[derive(
Queryable,
Expand Down
71 changes: 70 additions & 1 deletion nexus/db-queries/src/db/datastore/region_replacement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,7 @@ impl DataStore {

/// Transition a RegionReplacement record from Completing to Complete,
/// clearing the operating saga id. Also removes the `volume_repair` record
/// that is taking a "lock" on the Volume.
/// that is taking a lock on the Volume.
pub async fn set_region_replacement_complete(
&self,
opctx: &OpContext,
Expand Down Expand Up @@ -723,6 +723,75 @@ impl DataStore {
})
}

/// Transition a RegionReplacement record from Requested to Complete, which
/// occurs when the associated volume is soft or hard deleted. Also removes
/// the `volume_repair` record that is taking a lock on the Volume.
pub async fn set_region_replacement_complete_from_requested(
&self,
opctx: &OpContext,
request: RegionReplacement,
) -> Result<(), Error> {
type TxnError = TransactionError<Error>;

assert_eq!(
request.replacement_state,
RegionReplacementState::Requested,
);

self.pool_connection_authorized(opctx)
.await?
.transaction_async(|conn| async move {
Self::volume_repair_delete_query(
request.volume_id,
request.id,
)
.execute_async(&conn)
.await?;

use db::schema::region_replacement::dsl;

let result = diesel::update(dsl::region_replacement)
.filter(dsl::id.eq(request.id))
.filter(
dsl::replacement_state.eq(RegionReplacementState::Requested),
)
.filter(dsl::operating_saga_id.is_null())
.set((
dsl::replacement_state.eq(RegionReplacementState::Complete),
))
.check_if_exists::<RegionReplacement>(request.id)
.execute_and_check(&conn)
.await?;

match result.status {
UpdateStatus::Updated => Ok(()),

UpdateStatus::NotUpdatedButExists => {
let record = result.found;

if record.replacement_state == RegionReplacementState::Complete {
Ok(())
} else {
Err(TxnError::CustomError(Error::conflict(format!(
"region replacement {} set to {:?} (operating saga id {:?})",
request.id,
record.replacement_state,
record.operating_saga_id,
))))
}
}
}
})
.await
.map_err(|e| match e {
TxnError::CustomError(error) => error,

TxnError::Database(error) => {
public_error_from_diesel(error, ErrorHandler::Server)
}
})
}

/// Nexus has been notified by an Upstairs (or has otherwised determined)
/// that a region replacement is done, so update the record. Filter on the
/// following:
Expand Down
32 changes: 27 additions & 5 deletions nexus/db-queries/src/db/datastore/volume.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1572,6 +1572,14 @@ impl DataStore {
.optional()
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
}

/// Return true if a volume was soft-deleted or hard-deleted
pub async fn volume_deleted(&self, volume_id: Uuid) -> Result<bool, Error> {
match self.volume_get(volume_id).await? {
Some(v) => Ok(v.time_deleted.is_some()),
None => Ok(true),
}
}
}

#[derive(Default, Clone, Debug, Serialize, Deserialize)]
Expand Down Expand Up @@ -2049,13 +2057,20 @@ impl DataStore {
let old_volume = if let Some(old_volume) = maybe_old_volume {
old_volume
} else {
// Existing volume was deleted, so return here. We can't
// perform the region replacement now, and this will
// short-circuit the rest of the process.
// Existing volume was hard-deleted, so return here. We
// can't perform the region replacement now, and this
// will short-circuit the rest of the process.

return Ok(VolumeReplaceResult::ExistingVolumeDeleted);
};

if old_volume.time_deleted.is_some() {
// Existing volume was soft-deleted, so return here for
// the same reason: the region replacement process
// should be short-circuited now.
return Ok(VolumeReplaceResult::ExistingVolumeDeleted);
}

let old_vcr: VolumeConstructionRequest =
match serde_json::from_str(&old_volume.data()) {
Ok(vcr) => vcr,
Expand Down Expand Up @@ -2260,13 +2275,20 @@ impl DataStore {
let old_volume = if let Some(old_volume) = maybe_old_volume {
old_volume
} else {
// Existing volume was deleted, so return here. We can't
// perform the region snapshot replacement now, and this
// Existing volume was hard-deleted, so return here. We
// can't perform the region replacement now, and this
// will short-circuit the rest of the process.

return Ok(VolumeReplaceResult::ExistingVolumeDeleted);
};

if old_volume.time_deleted.is_some() {
// Existing volume was soft-deleted, so return here for
// the same reason: the region replacement process
// should be short-circuited now.
return Ok(VolumeReplaceResult::ExistingVolumeDeleted);
}

let old_vcr: VolumeConstructionRequest =
match serde_json::from_str(&old_volume.data()) {
Ok(vcr) => vcr,
Expand Down
2 changes: 1 addition & 1 deletion nexus/examples/config-second.toml
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ blueprints.period_secs_collect_crdb_node_ids = 180
sync_service_zone_nat.period_secs = 30
switch_port_settings_manager.period_secs = 30
region_replacement.period_secs = 30
region_replacement_driver.period_secs = 10
region_replacement_driver.period_secs = 30
# How frequently to query the status of active instances.
instance_watcher.period_secs = 30
# How frequently to schedule new instance update sagas.
Expand Down
2 changes: 1 addition & 1 deletion nexus/examples/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ blueprints.period_secs_collect_crdb_node_ids = 180
sync_service_zone_nat.period_secs = 30
switch_port_settings_manager.period_secs = 30
region_replacement.period_secs = 30
region_replacement_driver.period_secs = 10
region_replacement_driver.period_secs = 30
# How frequently to query the status of active instances.
instance_watcher.period_secs = 30
# How frequently to schedule new instance update sagas.
Expand Down
111 changes: 110 additions & 1 deletion nexus/src/app/background/tasks/region_replacement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,79 @@ impl BackgroundTask for RegionReplacementDetector {
};

for request in requests {
// If the replacement request is in the `requested` state and
// the request's volume was soft-deleted or hard-deleted, avoid
// sending the start request and instead transition the request
// to completed

let volume_deleted = match self
.datastore
.volume_deleted(request.volume_id)
.await
{
Ok(volume_deleted) => volume_deleted,

Err(e) => {
let s = format!(
"error checking if volume id {} was deleted: {e}",
request.volume_id,
);
error!(&log, "{s}");

status.errors.push(s);
continue;
}
};

let request_id = request.id;

if volume_deleted {
// Volume was soft or hard deleted, so proceed with clean
// up, which if this is in state Requested there won't be
// any additional associated state, so transition the record
// to Completed.

info!(
&log,
"request {} volume {} was soft or hard deleted!",
request_id,
request.volume_id,
);

let result = self
.datastore
.set_region_replacement_complete_from_requested(
opctx, request,
)
.await;

match result {
Ok(()) => {
let s = format!(
"request {} transitioned from requested to \
complete",
request_id,
);

info!(&log, "{s}");
status.requests_completed_ok.push(s);
}

Err(e) => {
let s = format!(
"error transitioning {} from requested to \
complete: {e}",
request_id,
);

error!(&log, "{s}");
status.errors.push(s);
}
}

continue;
}

let result = self
.send_start_request(
authn::saga::Serialized::for_opctx(opctx),
Expand Down Expand Up @@ -213,7 +284,10 @@ mod test {
use super::*;
use crate::app::background::init::test::NoopStartSaga;
use nexus_db_model::RegionReplacement;
use nexus_db_model::Volume;
use nexus_test_utils_macros::nexus_test;
use sled_agent_client::types::CrucibleOpts;
use sled_agent_client::types::VolumeConstructionRequest;
use uuid::Uuid;

type ControlPlaneTestContext =
Expand All @@ -239,14 +313,49 @@ mod test {
assert_eq!(result, json!(RegionReplacementStatus::default()));

// Add a region replacement request for a fake region
let request = RegionReplacement::new(Uuid::new_v4(), Uuid::new_v4());
let volume_id = Uuid::new_v4();
let request = RegionReplacement::new(Uuid::new_v4(), volume_id);
let request_id = request.id;

datastore
.insert_region_replacement_request(&opctx, request)
.await
.unwrap();

let volume_construction_request = VolumeConstructionRequest::Volume {
id: volume_id,
block_size: 0,
sub_volumes: vec![VolumeConstructionRequest::Region {
block_size: 0,
blocks_per_extent: 0,
extent_count: 0,
gen: 0,
opts: CrucibleOpts {
id: volume_id,
target: vec![
// if you put something here, you'll need a synthetic
// dataset record
],
lossy: false,
flush_timeout: None,
key: None,
cert_pem: None,
key_pem: None,
root_cert_pem: None,
control: None,
read_only: false,
},
}],
read_only_parent: None,
};

let volume_data =
serde_json::to_string(&volume_construction_request).unwrap();

let volume = Volume::new(volume_id, volume_data);

datastore.volume_create(volume).await.unwrap();

// Activate the task - it should pick that up and try to run the region
// replacement start saga
let result: RegionReplacementStatus =
Expand Down
26 changes: 26 additions & 0 deletions nexus/src/app/sagas/region_replacement_drive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,32 @@ async fn srrd_drive_region_replacement_check(
&params.serialized_authn,
);

// It doesn't make sense to perform any of this saga if the volume was soft
// or hard deleted: for example, this happens if the higher level resource
// like the disk was deleted. Volume deletion potentially results in the
// clean-up of Crucible resources, so it wouldn't even be valid to attempt
// to drive forward any type of live repair or reconciliation.
//
// Setting Done here will cause this saga to transition the replacement
// request to ReplacementDone.

let volume_deleted = osagactx
.datastore()
.volume_deleted(params.request.volume_id)
.await
.map_err(ActionError::action_failed)?;

if volume_deleted {
info!(
log,
"volume was soft or hard deleted!";
"region replacement id" => %params.request.id,
"volume id" => %params.request.volume_id,
);

return Ok(DriveCheck::Done);
}

let last_request_step = osagactx
.datastore()
.current_region_replacement_request_step(&opctx, params.request.id)
Expand Down
4 changes: 2 additions & 2 deletions nexus/src/app/sagas/region_replacement_finish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,8 @@ pub(crate) mod test {
opts: CrucibleOpts {
id: old_region_volume_id,
target: vec![
// XXX if you put something here, you'll need a
// synthetic dataset record
// if you put something here, you'll need a synthetic
// dataset record
],
lossy: false,
flush_timeout: None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,8 @@ pub(crate) mod test {
opts: CrucibleOpts {
id: old_snapshot_volume_id,
target: vec![
// XXX if you put something here, you'll need a
// synthetic dataset record
// if you put something here, you'll need a synthetic
// dataset record
],
lossy: false,
flush_timeout: None,
Expand Down
Loading

0 comments on commit 1b82134

Please sign in to comment.