diff --git a/pkg/destroy/gcp/gcp.go b/pkg/destroy/gcp/gcp.go index d2cd2a61400..9df3ab0a8fe 100644 --- a/pkg/destroy/gcp/gcp.go +++ b/pkg/destroy/gcp/gcp.go @@ -24,6 +24,7 @@ import ( gcpconfig "github.com/openshift/installer/pkg/asset/installconfig/gcp" "github.com/openshift/installer/pkg/destroy/providers" "github.com/openshift/installer/pkg/types" + gcptypes "github.com/openshift/installer/pkg/types/gcp" "github.com/openshift/installer/pkg/version" ) @@ -44,6 +45,11 @@ type ClusterUninstaller struct { dnsSvc *dns.Service storageSvc *storage.Service + // cloudControllerUID is the cluster ID used by the cluster's cloud controller + // to generate load balancer related resources. It can be obtained either + // from metadata or by inferring it from existing cluster resources. + cloudControllerUID string + requestIDTracker pendingItemTracker } @@ -51,12 +57,12 @@ type ClusterUninstaller struct { // New returns an AWS destroyer from ClusterMetadata. func New(logger logrus.FieldLogger, metadata *types.ClusterMetadata) (providers.Destroyer, error) { return &ClusterUninstaller{ - Logger: logger, - Region: metadata.ClusterPlatformMetadata.GCP.Region, - ProjectID: metadata.ClusterPlatformMetadata.GCP.ProjectID, - ClusterID: metadata.InfraID, - Context: context.Background(), - + Logger: logger, + Region: metadata.ClusterPlatformMetadata.GCP.Region, + ProjectID: metadata.ClusterPlatformMetadata.GCP.ProjectID, + ClusterID: metadata.InfraID, + Context: context.Background(), + cloudControllerUID: gcptypes.CloudControllerUID(metadata.InfraID), requestIDTracker: newRequestIDTracker(), pendingItemTracker: newPendingItemTracker(), }, nil @@ -157,6 +163,14 @@ func isNoOp(err error) bool { return ok && (ae.Code == http.StatusNotFound || ae.Code == http.StatusNotModified) } +func isNotFound(err error) bool { + if err == nil { + return false + } + ae, ok := err.(*googleapi.Error) + return ok && ae.Code == http.StatusNotFound +} + // aggregateError is a utility function that takes a slice of errors and an // optional pending argument, and returns an error or nil func aggregateError(errs []error, pending ...int) error { diff --git a/pkg/destroy/gcp/network.go b/pkg/destroy/gcp/network.go index 0cf1abffc24..3ccd3fce860 100644 --- a/pkg/destroy/gcp/network.go +++ b/pkg/destroy/gcp/network.go @@ -39,7 +39,7 @@ func (o *ClusterUninstaller) listFirewalls() ([]string, error) { return result, nil } -func (o *ClusterUninstaller) deleteFirewall(name string) error { +func (o *ClusterUninstaller) deleteFirewall(name string, errorOnPending bool) error { o.Logger.Debugf("Deleting firewall rule %s", name) ctx, cancel := o.contextWithTimeout() defer cancel() @@ -52,6 +52,9 @@ func (o *ClusterUninstaller) deleteFirewall(name string) error { o.resetRequestID("firewall", name) return errors.Errorf("failed to delete firewall %s with erorr: %s", name, op.HttpErrorMessage) } + if errorOnPending && op != nil && op.Status != "DONE" { + return errors.Errorf("deletion of firewall %s is pending", name) + } return nil } @@ -66,7 +69,7 @@ func (o *ClusterUninstaller) destroyFirewalls() error { errs := []error{} for _, firewall := range firewalls { found = append(found, firewall) - err := o.deleteFirewall(firewall) + err := o.deleteFirewall(firewall, false) if err != nil { errs = append(errs, err) } @@ -97,7 +100,7 @@ func (o *ClusterUninstaller) listAddresses() ([]string, error) { return result, nil } -func (o *ClusterUninstaller) deleteAddress(name string) error { +func (o *ClusterUninstaller) deleteAddress(name string, errorOnPending bool) error { o.Logger.Debugf("Deleting address %s", name) ctx, cancel := o.contextWithTimeout() defer cancel() @@ -110,6 +113,9 @@ func (o *ClusterUninstaller) deleteAddress(name string) error { o.resetRequestID("address", name) return errors.Errorf("failed to delete address %s with erorr: %s", name, op.HttpErrorMessage) } + if errorOnPending && op != nil && op.Status != "DONE" { + return errors.Errorf("deletion of address %s is pending", name) + } return nil } @@ -124,7 +130,7 @@ func (o *ClusterUninstaller) destroyAddresses() error { errs := []error{} for _, address := range addresses { found = append(found, address) - err := o.deleteAddress(address) + err := o.deleteAddress(address, false) if err != nil { errs = append(errs, err) } @@ -155,7 +161,7 @@ func (o *ClusterUninstaller) listForwardingRules() ([]string, error) { return result, nil } -func (o *ClusterUninstaller) deleteForwardingRule(name string) error { +func (o *ClusterUninstaller) deleteForwardingRule(name string, errorOnPending bool) error { o.Logger.Debugf("Deleting forwarding rule %s", name) ctx, cancel := o.contextWithTimeout() defer cancel() @@ -168,6 +174,9 @@ func (o *ClusterUninstaller) deleteForwardingRule(name string) error { o.resetRequestID("forwardingrule", name) return errors.Errorf("failed to delete forwarding rule %s with erorr: %s", name, op.HttpErrorMessage) } + if errorOnPending && op != nil && op.Status != "DONE" { + return errors.Errorf("deletion of forwarding rule %s is pending", name) + } return nil } @@ -182,7 +191,7 @@ func (o *ClusterUninstaller) destroyForwardingRules() error { errs := []error{} for _, forwardingRule := range forwardingRules { found = append(found, forwardingRule) - err := o.deleteForwardingRule(forwardingRule) + err := o.deleteForwardingRule(forwardingRule, false) if err != nil { errs = append(errs, err) } @@ -242,6 +251,36 @@ func (o *ClusterUninstaller) deleteBackendService(name string) error { return nil } +func (o *ClusterUninstaller) clearBackendServiceHealthChecks(name string) error { + o.Logger.Debugf("Clearing backend service %s health checks", name) + ctx, cancel := o.contextWithTimeout() + defer cancel() + backendService, err := o.computeSvc.RegionBackendServices.Get(o.ProjectID, o.Region, name).Context(ctx).Do() + if isNotFound(err) { + return nil + } + if err != nil { + return errors.Wrapf(err, "cannot retrieve backend service %s", name) + } + if len(backendService.HealthChecks) == 0 { + o.Logger.Debugf("Backend service %s has no health checks to clear", name) + } + backendService.HealthChecks = []string{} + op, err := o.computeSvc.RegionBackendServices.Patch(o.ProjectID, o.Region, name, backendService).Context(ctx).RequestId(o.requestID("clearbackendservice", name)).Context(ctx).Do() + if err != nil && !isNoOp(err) { + o.resetRequestID("clearbackendservice", name) + return errors.Wrapf(err, "failed to clear backend service %s health checks", name) + } + if op != nil && op.Status == "DONE" && isErrorStatus(op.HttpErrorStatusCode) { + o.resetRequestID("backendservice", name) + return errors.Errorf("failed to clear backend service %s health checks with erorr: %s", name, op.HttpErrorMessage) + } + if op != nil && op.Status != "DONE" { + return errors.Errorf("backend service pending to be cleared of health checks") + } + return nil +} + // destroyBackendServices removes backend services with a name prefixed by the // cluster's infra ID. func (o *ClusterUninstaller) destroyBackendServices() error { @@ -284,7 +323,7 @@ func (o *ClusterUninstaller) listHealthChecks() ([]string, error) { return result, nil } -func (o *ClusterUninstaller) deleteHealthCheck(name string) error { +func (o *ClusterUninstaller) deleteHealthCheck(name string, errorOnPending bool) error { o.Logger.Debugf("Deleting health check %s", name) ctx, cancel := o.contextWithTimeout() defer cancel() @@ -297,6 +336,9 @@ func (o *ClusterUninstaller) deleteHealthCheck(name string) error { o.resetRequestID("healthcheck", name) return errors.Errorf("failed to delete health check %s with erorr: %s", name, op.HttpErrorMessage) } + if errorOnPending && op != nil && op.Status != "DONE" { + return errors.Errorf("deletion of forwarding rule %s is pending", name) + } return nil } @@ -311,7 +353,7 @@ func (o *ClusterUninstaller) destroyHealthChecks() error { errs := []error{} for _, healthCheck := range healthChecks { found = append(found, healthCheck) - err := o.deleteHealthCheck(healthCheck) + err := o.deleteHealthCheck(healthCheck, false) if err != nil { errs = append(errs, err) } @@ -426,6 +468,41 @@ func (o *ClusterUninstaller) deleteTargetPool(name string) error { return nil } +func (o *ClusterUninstaller) clearTargetPoolHealthChecks(name string) error { + o.Logger.Debugf("Clearing target pool %s health checks", name) + ctx, cancel := o.contextWithTimeout() + defer cancel() + targetPool, err := o.computeSvc.TargetPools.Get(o.ProjectID, o.Region, name).Context(ctx).Do() + if isNotFound(err) { + return nil + } + if err != nil { + return errors.Wrapf(err, "cannot retrieve target pool %s", name) + } + if len(targetPool.HealthChecks) == 0 { + o.Logger.Debugf("Backend service %s has no health checks to clear", name) + } + hcRemoveRequest := &compute.TargetPoolsRemoveHealthCheckRequest{} + for _, hc := range targetPool.HealthChecks { + hcRemoveRequest.HealthChecks = append(hcRemoveRequest.HealthChecks, &compute.HealthCheckReference{ + HealthCheck: hc, + }) + } + op, err := o.computeSvc.TargetPools.RemoveHealthCheck(o.ProjectID, o.Region, name, hcRemoveRequest).Context(ctx).RequestId(o.requestID("cleartargetpool", name)).Context(ctx).Do() + if err != nil && !isNoOp(err) { + o.resetRequestID("cleartargetpool", name) + return errors.Wrapf(err, "failed to clear target pool %s health checks", name) + } + if op != nil && op.Status == "DONE" && isErrorStatus(op.HttpErrorStatusCode) { + o.resetRequestID("cleartargetpool", name) + return errors.Errorf("failed to clear target pool %s health checks with erorr: %s", name, op.HttpErrorMessage) + } + if op != nil && op.Status != "DONE" { + return errors.Errorf("target pool pending to be cleared of health checks") + } + return nil +} + // destroyTargetPools removes target pools created for external load balancers that have // a name that starts with the cluster infra ID. These are load balancers created by the // installer or cluster operators. @@ -478,7 +555,7 @@ func (o *ClusterUninstaller) deleteSubNetwork(name string) error { o.resetRequestID("subnetwork", name) return errors.Wrapf(err, "failed to delete subnetwork %s", name) } - if isErrorStatus(op.HttpErrorStatusCode) { + if op != nil && op.Status == "DONE" && isErrorStatus(op.HttpErrorStatusCode) { o.resetRequestID("subnetwork", name) return errors.Errorf("failed to delete subnetwork %s with erorr: %s", name, op.HttpErrorMessage) } @@ -713,11 +790,13 @@ func (o *ClusterUninstaller) destroyRouters() error { // where clusterid is an 8-char id generated and saved in a configmap named // "ingress-uid" in kube-system, key "uid": // https://github.com/openshift/kubernetes/blob/1e5983903742f64bca36a464582178c940353e9a/pkg/cloudprovider/providers/gce/gce_clusterid.go#L210-L238 -// TODO: Have the installer generate this UID, save it to metadata and populate it in the cluster. With that, we can just directly get -// an instanceGroup named "k8s-ig--{clusterid}" -// For now, we look into each instance group and determine if it contains instances prefixed with the cluster's infra ID +// If no clusterID is set, we look into each instance group and determine if it contains instances prefixed with the cluster's infra ID func (o *ClusterUninstaller) getInternalLBInstanceGroups() ([]nameAndZone, error) { - candidates, err := o.listInstanceGroupsWithFilter("name eq \"k8s-ig--.*\"") + filter := "name eq \"k8s-ig--.*\"" + if len(o.cloudControllerUID) > 0 { + filter = fmt.Sprintf("name eq \"k8s-ig--%s\"", o.cloudControllerUID) + } + candidates, err := o.listInstanceGroupsWithFilter(filter) if err != nil { return nil, err } @@ -726,14 +805,18 @@ func (o *ClusterUninstaller) getInternalLBInstanceGroups() ([]nameAndZone, error } igName := "" - for _, ig := range candidates { - instances, err := o.listInstanceGroupInstances(ig) - if err != nil { - return nil, errors.Wrapf(err, "failed to get internal LB instance group instances for %s", ig.name) - } - if o.areAllClusterInstances(instances) { - igName = ig.name - break + if len(o.cloudControllerUID) > 0 { + igName = fmt.Sprintf("k8s-ig--%s", o.cloudControllerUID) + } else { + for _, ig := range candidates { + instances, err := o.listInstanceGroupInstances(ig) + if err != nil { + return nil, errors.Wrapf(err, "failed to get internal LB instance group instances for %s", ig.name) + } + if o.areAllClusterInstances(instances) { + igName = ig.name + break + } } } igs := []nameAndZone{} @@ -744,7 +827,6 @@ func (o *ClusterUninstaller) getInternalLBInstanceGroups() ([]nameAndZone, error } } } - return igs, nil } @@ -766,7 +848,7 @@ func (o *ClusterUninstaller) listBackendServicesForInstanceGroups(igs []nameAndZ for _, ig := range igs { urls.Insert(o.getInstanceGroupURL(ig)) } - return o.listBackendServicesWithFilter("items(name,backends)", "", func(item *compute.BackendService) bool { + return o.listBackendServicesWithFilter("items(name,backends)", "name eq \"a[0-9a-f]{30,50}\"", func(item *compute.BackendService) bool { if len(item.Backends) == 0 { return false } @@ -782,26 +864,34 @@ func (o *ClusterUninstaller) listBackendServicesForInstanceGroups(igs []nameAndZ // deleteInternalLoadBalancer follows a similar cleanup procedure as: // https://github.com/openshift/kubernetes/blob/1e5983903742f64bca36a464582178c940353e9a/pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go#L222 // TODO: add cleanup for shared mode resources (determine if it's supported in 4.2) -func (o *ClusterUninstaller) deleteInternalLoadBalancer(clusterID string, loadBalancerName string) error { - if err := o.deleteAddress(loadBalancerName); err != nil { +func (o *ClusterUninstaller) deleteInternalLoadBalancer(loadBalancerName string) error { + + // First, remove backend service health checks so that we can delete health checks first + // without having to delete the backend service. The backend service is the resource + // that tells us that there is an internal load balancer pending deletion. It should be + // deleted last. + if err := o.clearBackendServiceHealthChecks(loadBalancerName); err != nil { return err } - if err := o.deleteForwardingRule(loadBalancerName); err != nil { + if err := o.deleteAddress(loadBalancerName, true); err != nil { return err } - // TODO: Figure out a way to preserve the backend service name after this is - // gone. Otherwise we can't find this internal load balancer again. However, - // the backend service must be deleted first before health checks. - if err := o.deleteBackendService(loadBalancerName); err != nil { + if err := o.deleteForwardingRule(loadBalancerName, true); err != nil { + return err + } + if err := o.deleteHealthCheck(loadBalancerName, true); err != nil { return err } - if err := o.deleteHealthCheck(loadBalancerName); err != nil { + if err := o.deleteHealthCheck(loadBalancerName+"-hc", true); err != nil { return err } - if err := o.deleteHealthCheck(loadBalancerName + "-hc"); err != nil { + if err := o.deleteFirewall(loadBalancerName, true); err != nil { return err } - if err := o.deleteFirewall(loadBalancerName + "-hc"); err != nil { + if err := o.deleteFirewall(loadBalancerName+"-hc", true); err != nil { + return err + } + if err := o.deleteBackendService(loadBalancerName); err != nil { return err } return nil @@ -811,19 +901,29 @@ func (o *ClusterUninstaller) deleteInternalLoadBalancer(clusterID string, loadBa // https://github.com/openshift/kubernetes/blob/1e5983903742f64bca36a464582178c940353e9a/pkg/cloudprovider/providers/gce/gce_loadbalancer_external.go#L289 // TODO: cleanup nodes health check (using clusterid) func (o *ClusterUninstaller) deleteExternalLoadBalancer(loadBalancerName string) error { - if err := o.deleteAddress(loadBalancerName); err != nil { + + // Remove health checks from target pool so we can delete the health checks first + // and leave the target pool for last. The target pool is the anchor for external load + // balancers and without it we do not have a name to use when deleting them. + if err := o.clearTargetPoolHealthChecks(loadBalancerName); err != nil { return err } - if err := o.deleteForwardingRule(loadBalancerName); err != nil { + if err := o.deleteAddress(loadBalancerName, true); err != nil { return err } - if err := o.deleteFirewall(fmt.Sprintf("k8s-fw-%s", loadBalancerName)); err != nil { + if err := o.deleteForwardingRule(loadBalancerName, true); err != nil { return err } - if err := o.deleteTargetPool(loadBalancerName); err != nil { + if err := o.deleteFirewall(fmt.Sprintf("k8s-fw-%s", loadBalancerName), true); err != nil { + return err + } + if err := o.deleteFirewall(fmt.Sprintf("k8s-%s-http-hc", loadBalancerName), true); err != nil { + return err + } + if err := o.deleteHealthCheck(loadBalancerName, true); err != nil { return err } - if err := o.deleteHealthCheck(loadBalancerName); err != nil { + if err := o.deleteTargetPool(loadBalancerName); err != nil { return err } return nil @@ -840,9 +940,12 @@ func (o *ClusterUninstaller) destroyCloudControllerInternalLBs() error { return err } if len(groups) == 0 { + o.Logger.Debugf("Did not find any internal load balancer instance groups") return nil } - clusterID := strings.TrimPrefix(groups[0].name, "k8s-ig--") + if o.cloudControllerUID == "" { + o.cloudControllerUID = strings.TrimPrefix(groups[0].name, "k8s-ig--") + } backends, err := o.listBackendServicesForInstanceGroups(groups) if err != nil { return err @@ -854,7 +957,7 @@ func (o *ClusterUninstaller) destroyCloudControllerInternalLBs() error { // For each, remove related resources for _, backend := range backends { found = append(found, backend) - err := o.deleteInternalLoadBalancer(clusterID, backend) + err := o.deleteInternalLoadBalancer(backend) if err != nil { errs = append(errs, err) } @@ -880,6 +983,14 @@ func (o *ClusterUninstaller) destroyCloudControllerInternalLBs() error { for _, item := range deleted { o.Logger.Infof("Deleted instance group %s", item) } + if len(o.cloudControllerUID) > 0 { + if err := o.deleteHealthCheck(fmt.Sprintf("k8s-%s-node", o.cloudControllerUID), true); err != nil { + return err + } + if err := o.deleteFirewall(fmt.Sprintf("k8s-%s-node-http-hc", o.cloudControllerUID), true); err != nil { + return err + } + } return aggregateError(errs, len(found)) } @@ -922,5 +1033,13 @@ func (o *ClusterUninstaller) destroyCloudControllerExternalLBs() error { for _, item := range deleted { o.Logger.Infof("Deleted external load balancer %s", item) } + if len(o.cloudControllerUID) != 0 { + if err := o.deleteHealthCheck(fmt.Sprintf("k8s-%s-node-hc", o.cloudControllerUID), true); err != nil { + return err + } + if err := o.deleteFirewall(fmt.Sprintf("k8s-%s-node--hc", o.cloudControllerUID), true); err != nil { + return err + } + } return aggregateError(errs, len(found)) }