Wait for Crucible agent to do requested work

Several endpoints of the Crucible agent do not perform work synchronously with that endpoint's handler: changes are instead requested and the actual work proceeds asynchronously. Nexus assumes that the work is synchronous though. This commit creates three functions in common_storage that properly deal with an agent that does asynchronous work (`delete_crucible_region`, `delete_crucible_snapshot`, and `delete_crucible_running_snapshot`), and calls those. Part of testing this commit was creating "disk" antagonists in the omicron-stress tool. This uncovered cases where the transactions related to disk creation and deletion failed to complete. One of these cases is in `regions_hard_delete` - this transaction is now retried until it succeeds. The `TransactionError::retry_transaction` function can be used to see if CRDB is telling Nexus to retry the transaction. Another is `decrease_crucible_resource_count_and_soft_delete_volume` - this was broken into two steps, the first of which enumerates the read-only resources to delete (because those don't currently change!). Another fix that's part of this commit, exposed by the disk antagonist: it should only be ok to delete a disk in state `Creating` if you're in the disk create saga. If this is not true, it's possible for a delete of a disk currently in the disk create saga to cause that saga to fail unwinding and remain stuck. This commit also bundles idempotency related fixes for the simulated Crucible agent, as these were exposed with the retries that are performed by the new `delete_crucible_*` functions. What shook out of this is that `Nexus::volume_delete` was removed, as this was not correct to call during the unwind of the disk and snapshot create sagas: it would conflict with what those sagas were doing as part of their unwind. The volume delete saga is still required during disk and snapshot deletion, but there is enough context during the disk and snapshot create sagas to properly unwind a volume, even in the case of read-only parents. `Nexus::volume_delete` ultimately isn't safe, so it was removed: instead, any future sagas should either embed the volume delete saga as a sub saga, or there should be enough context in the outputs said future saga's nodes to properly unwind what was created. Depends on oxidecomputer/crucible#838, which fixes a few non-idempotency bugs in the actual Crucible agent. Fixes oxidecomputer#3698
jmpesp · Jul 21, 2023 · 1b364df · 1b364df
1 parent 09e9d8d
commit 1b364df
Show file tree

Hide file tree

Showing 16 changed files with 933 additions and 395 deletions.
diff --git a/nexus/db-model/src/volume.rs b/nexus/db-model/src/volume.rs
@@ -7,19 +7,9 @@ use crate::collection::DatastoreCollectionConfig;
 use crate::schema::{region, volume};
 use chrono::{DateTime, Utc};
 use db_macros::Asset;
-use serde::{Deserialize, Serialize};
 use uuid::Uuid;
 
-#[derive(
- Asset,
- Queryable,
- Insertable,
- Debug,
- Selectable,
- Serialize,
- Deserialize,
- Clone,
-)]
+#[derive(Asset, Queryable, Insertable, Debug, Selectable, Clone)]
 #[diesel(table_name = volume)]
 pub struct Volume {
  #[diesel(embed)]

diff --git a/nexus/db-queries/src/db/datastore/disk.rs b/nexus/db-queries/src/db/datastore/disk.rs
@@ -568,17 +568,12 @@ impl DataStore {
  pub async fn project_delete_disk_no_auth(
  &self,
  disk_id: &Uuid,
+ ok_to_delete_states: &[api::external::DiskState],
  ) -> Result<db::model::Disk, Error> {
  use db::schema::disk::dsl;
  let pool = self.pool();
  let now = Utc::now();
 
- let ok_to_delete_states = vec![
- api::external::DiskState::Detached,
- api::external::DiskState::Faulted,
- api::external::DiskState::Creating,
- ];
-
  let ok_to_delete_state_labels: Vec<_> =
  ok_to_delete_states.iter().map(|s| s.label()).collect();
  let destroyed = api::external::DiskState::Destroyed.label();

diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs
@@ -92,6 +92,7 @@ pub use silo::Discoverability;
 pub use switch_port::SwitchPortSettingsCombinedResult;
 pub use virtual_provisioning_collection::StorageType;
 pub use volume::CrucibleResources;
+pub use volume::CrucibleTargets;
 
 // Number of unique datasets required to back a region.
 // TODO: This should likely turn into a configuration option.

diff --git a/nexus/db-queries/src/db/datastore/region.rs b/nexus/db-queries/src/db/datastore/region.rs
@@ -22,6 +22,8 @@ use nexus_types::external_api::params;
 use omicron_common::api::external;
 use omicron_common::api::external::DeleteResult;
 use omicron_common::api::external::Error;
+use omicron_common::backoff::{self, BackoffError};
+use slog::Logger;
 use uuid::Uuid;
 
 impl DataStore {
@@ -146,6 +148,7 @@ impl DataStore {
  /// Also updates the storage usage on their corresponding datasets.
  pub async fn regions_hard_delete(
  &self,
+ log: &Logger,
  region_ids: Vec<Uuid>,
  ) -> DeleteResult {
  if region_ids.is_empty() {
@@ -159,67 +162,95 @@ impl DataStore {
  }
  type TxnError = TransactionError<RegionDeleteError>;
 
- self.pool()
- .transaction(move |conn| {
- use db::schema::dataset::dsl as dataset_dsl;
- use db::schema::region::dsl as region_dsl;
-
- // Remove the regions, collecting datasets they're from.
- let datasets = diesel::delete(region_dsl::region)
- .filter(region_dsl::id.eq_any(region_ids))
- .returning(region_dsl::dataset_id)
- .get_results::<Uuid>(conn)?;
-
- // Update datasets to which the regions belonged.
- for dataset in datasets {
- let dataset_total_occupied_size: Option<
- diesel::pg::data_types::PgNumeric,
- > = region_dsl::region
- .filter(region_dsl::dataset_id.eq(dataset))
- .select(diesel::dsl::sum(
- region_dsl::block_size
- * region_dsl::blocks_per_extent
- * region_dsl::extent_count,
- ))
- .nullable()
- .get_result(conn)?;
-
- let dataset_total_occupied_size: i64 = if let Some(
- dataset_total_occupied_size,
- ) =
- dataset_total_occupied_size
- {
- let dataset_total_occupied_size: db::model::ByteCount =
- dataset_total_occupied_size.try_into().map_err(
- |e: anyhow::Error| {
- TxnError::CustomError(
- RegionDeleteError::NumericError(
- e.to_string(),
- ),
- )
- },
- )?;
-
- dataset_total_occupied_size.into()
- } else {
- 0
- };
-
- diesel::update(dataset_dsl::dataset)
- .filter(dataset_dsl::id.eq(dataset))
- .set(
- dataset_dsl::size_used
- .eq(dataset_total_occupied_size),
- )
- .execute(conn)?;
- }
-
- Ok(())
- })
- .await
- .map_err(|e: TxnError| {
- Error::internal_error(&format!("Transaction error: {}", e))
- })
+ // Retry this transaction until it succeeds. It's a little heavy in that
+ // there's a for loop inside that iterates over the datasets the
+ // argument regions belong to, and it often encounters the "retry
+ // transaction" error.
+ let transaction = {
+ |region_ids: Vec<Uuid>| async {
+ self.pool()
+ .transaction(move |conn| {
+ use db::schema::dataset::dsl as dataset_dsl;
+ use db::schema::region::dsl as region_dsl;
+
+ // Remove the regions, collecting datasets they're from.
+ let datasets = diesel::delete(region_dsl::region)
+ .filter(region_dsl::id.eq_any(region_ids))
+ .returning(region_dsl::dataset_id)
+ .get_results::<Uuid>(conn)?;
+
+ // Update datasets to which the regions belonged.
+ for dataset in datasets {
+ let dataset_total_occupied_size: Option<
+ diesel::pg::data_types::PgNumeric,
+ > = region_dsl::region
+ .filter(region_dsl::dataset_id.eq(dataset))
+ .select(diesel::dsl::sum(
+ region_dsl::block_size
+ * region_dsl::blocks_per_extent
+ * region_dsl::extent_count,
+ ))
+ .nullable()
+ .get_result(conn)?;
+
+ let dataset_total_occupied_size: i64 = if let Some(
+ dataset_total_occupied_size,
+ ) =
+ dataset_total_occupied_size
+ {
+ let dataset_total_occupied_size: db::model::ByteCount =
+ dataset_total_occupied_size.try_into().map_err(
+ |e: anyhow::Error| {
+ TxnError::CustomError(
+ RegionDeleteError::NumericError(
+ e.to_string(),
+ ),
+ )
+ },
+ )?;
+
+ dataset_total_occupied_size.into()
+ } else {
+ 0
+ };
+
+ diesel::update(dataset_dsl::dataset)
+ .filter(dataset_dsl::id.eq(dataset))
+ .set(
+ dataset_dsl::size_used
+ .eq(dataset_total_occupied_size),
+ )
+ .execute(conn)?;
+ }
+
+ Ok(())
+ })
+ .await
+ .map_err(|e: TxnError| {
+ if e.retry_transaction() {
+ BackoffError::transient(Error::internal_error(
+ &format!("Retryable transaction error {:?}", e)
+ ))
+ } else {
+ BackoffError::Permanent(Error::internal_error(
+ &format!("Transaction error: {}", e)
+ ))
+ }
+ })
+ }
+ };
+
+ backoff::retry_notify(
+ backoff::retry_policy_internal_service_aggressive(),
+ || async {
+ let region_ids = region_ids.clone();
+ transaction(region_ids).await
+ },
+ |e: Error, delay| {
+ info!(log, "{:?}, trying again in {:?}", e, delay,);
+ },
+ )
+ .await
  }
 
  /// Return the total occupied size for a dataset