Skip to content

Commit

Permalink
Stop deleting failed clusters, taint instead
Browse files Browse the repository at this point in the history
  • Loading branch information
rileykarson committed Sep 16, 2023
1 parent 1a32f8d commit 522562e
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 70 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2324,12 +2324,11 @@ func resourceContainerClusterCreate(d *schema.ResourceData, meta interface{}) er
}

// For now PSC based cluster don't support `enable_private_endpoint` on `create`, but only on `update` API call.
// If cluster is PSC based and enable_private_endpoint is set to true we will ignore it on `create` call and update cluster right after creation.
// If cluster is PSC based and enable_private_endpoint is set to true we will ignore it on `create` call and update cluster right after creation.
enablePrivateEndpointPSCCluster := isEnablePrivateEndpointPSCCluster(cluster)
if enablePrivateEndpointPSCCluster {
cluster.PrivateClusterConfig.EnablePrivateEndpoint = false
}


req := &container.CreateClusterRequest{
Cluster: cluster,
Expand Down Expand Up @@ -2369,29 +2368,26 @@ func resourceContainerClusterCreate(d *schema.ResourceData, meta interface{}) er
if err := d.Set("operation", op.Name); err != nil {
return fmt.Errorf("Error setting operation: %s", err)
}

return nil
default:
// leaving default case to ensure this is non blocking
}

// Try a GET on the cluster so we can see the state in debug logs. This will help classify error states.
clusterGetCall := config.NewContainerClient(userAgent).Projects.Locations.Clusters.Get(containerClusterFullName(project, location, clusterName))
if config.UserProjectOverride {
clusterGetCall.Header().Add("X-Goog-User-Project", project)
}

_, getErr := clusterGetCall.Do()
if getErr != nil {
log.Printf("[WARN] Cluster %s was created in an error state and not found", clusterName)
d.SetId("")
}

if deleteErr := cleanFailedContainerCluster(d, meta); deleteErr != nil {
log.Printf("[WARN] Unable to clean up cluster from failed creation: %s", deleteErr)
// Leave ID set as the cluster likely still exists and should not be removed from state yet.
} else {
log.Printf("[WARN] Verified failed creation of cluster %s was cleaned up", d.Id())
d.SetId("")
}
// The resource didn't actually create
// Don't clear cluster id, this will taint the resource
log.Printf("[WARN] GKE cluster %s was created in an error state, and has been marked as tainted", clusterName)
return waitErr
}

Expand Down Expand Up @@ -2536,14 +2532,8 @@ func resourceContainerClusterRead(d *schema.ResourceData, meta interface{}) erro
d.SetId("")
}

if deleteErr := cleanFailedContainerCluster(d, meta); deleteErr != nil {
log.Printf("[WARN] Unable to clean up cluster from failed creation: %s", deleteErr)
// Leave ID set as the cluster likely still exists and should not be removed from state yet.
} else {
log.Printf("[WARN] Verified failed creation of cluster %s was cleaned up", d.Id())
d.SetId("")
}
// The resource didn't actually create
// Don't clear cluster id, this will taint the resource
log.Printf("[WARN] GKE cluster %s was created in an error state, and has been marked as tainted", clusterName)
return waitErr
}
}
Expand Down Expand Up @@ -4153,52 +4143,6 @@ func resourceContainerClusterDelete(d *schema.ResourceData, meta interface{}) er
return nil
}

// cleanFailedContainerCluster deletes clusters that failed but were
// created in an error state. Similar to resourceContainerClusterDelete
// but implemented in separate function as it doesn't try to lock already
// locked cluster state, does different error handling, and doesn't do retries.
func cleanFailedContainerCluster(d *schema.ResourceData, meta interface{}) error {
config := meta.(*transport_tpg.Config)

project, err := tpgresource.GetProject(d, config)
if err != nil {
return err
}

location, err := tpgresource.GetLocation(d, config)
if err != nil {
return err
}

userAgent, err := tpgresource.GenerateUserAgentString(d, config.UserAgent)
if err != nil {
return err
}

clusterName := d.Get("name").(string)
fullName := containerClusterFullName(project, location, clusterName)

log.Printf("[DEBUG] Cleaning up failed GKE cluster %s", d.Get("name").(string))
clusterDeleteCall := config.NewContainerClient(userAgent).Projects.Locations.Clusters.Delete(fullName)
if config.UserProjectOverride {
clusterDeleteCall.Header().Add("X-Goog-User-Project", project)
}
op, err := clusterDeleteCall.Do()
if err != nil {
return transport_tpg.HandleNotFoundError(err, d, fmt.Sprintf("Container Cluster %q", d.Get("name").(string)))
}

// Wait until it's deleted
waitErr := ContainerOperationWait(config, op, project, location, "deleting GKE cluster", userAgent, d.Timeout(schema.TimeoutDelete))
if waitErr != nil {
return waitErr
}

log.Printf("[INFO] GKE cluster %s has been deleted", d.Id())
d.SetId("")
return nil
}

var containerClusterRestingStates = RestingStates{
"RUNNING": ReadyState,
"DEGRADED": ErrorState,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3356,6 +3356,9 @@ func TestAccContainerCluster_autoprovisioningDefaultsManagement(t *testing.T) {
})
}

// This resource originally cleaned up the dangling cluster directly, but now
// taints it, having Terraform clean it up during the next apply. This test
// name is now inexact, but is being preserved to maintain the test history.
func TestAccContainerCluster_errorCleanDanglingCluster(t *testing.T) {
t.Parallel()

Expand All @@ -3376,18 +3379,18 @@ func TestAccContainerCluster_errorCleanDanglingCluster(t *testing.T) {
Config: initConfig,
},
{
ResourceName: "google_container_cluster.cidr_error_preempt",
ImportState: true,
ResourceName: "google_container_cluster.cidr_error_preempt",
ImportState: true,
ImportStateVerify: true,
},
{
Config: overlapConfig,
Config: overlapConfig,
ExpectError: regexp.MustCompile("Error waiting for creating GKE cluster"),
},
// If dangling cluster wasn't deleted, this plan will return an error
// If tainted cluster won't be deleted, this step will return an error
{
Config: overlapConfig,
PlanOnly: true,
Config: overlapConfig,
PlanOnly: true,
ExpectNonEmptyPlan: true,
},
},
Expand Down

0 comments on commit 522562e

Please sign in to comment.