-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3535 from abhinavdahiya/tf_diagnose
Bug 1837564: pkg/terraform: add diagnostics errors for terraform apply operations
- Loading branch information
Showing
4 changed files
with
276 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
package diagnostics | ||
|
||
import ( | ||
"bytes" | ||
"fmt" | ||
"io" | ||
"regexp" | ||
"strings" | ||
|
||
"github.com/pkg/errors" | ||
) | ||
|
||
// Err wraps diagnostics information for an error. | ||
// Err allows providing information like source, reason and message | ||
// that provides a much better user error reporting capability. | ||
type Err struct { | ||
Orig error | ||
|
||
// Source defines with entity is generating the error. | ||
// It allows passing along information about where the error is being | ||
// generated from. for example, the Asset. | ||
Source string | ||
|
||
// Reason is a CamelCase string that summarizes the error in one word. | ||
// It allows easy catgeorizations of known errors. | ||
Reason string | ||
|
||
// Message is free-form strings which provides important details or | ||
// diagnostics for the error. When writing messages, make sure to keep in mind | ||
// that the audience for message is end-users who might not be experts. | ||
Message string | ||
} | ||
|
||
// Unwrap allows the error to be unwrapped. | ||
func (e *Err) Unwrap() error { return e.Orig } | ||
|
||
// Error returns a string representation of the Err. The returned value | ||
// is expected to be a single value. | ||
// The format of the error string returned is, | ||
// `error(<Reason>) from <Source>: <Message>: <Cause of Orig>` | ||
func (e *Err) Error() string { | ||
buf := &bytes.Buffer{} | ||
if len(e.Source) > 0 { | ||
fmt.Fprintf(buf, "error(%s) from %s", e.Reason, e.Source) | ||
} else { | ||
fmt.Fprintf(buf, "error(%s)", e.Reason) | ||
} | ||
if msg := strings.TrimSpace(e.Message); len(msg) > 0 { | ||
msg = breakre.ReplaceAllString(msg, " ") | ||
fmt.Fprintf(buf, ": %s", msg) | ||
} | ||
if c := errors.Cause(e.Orig); c != nil { | ||
fmt.Fprintf(buf, ": %s", errors.Cause(e.Orig)) | ||
} | ||
return buf.String() | ||
} | ||
|
||
// Print prints the Err to Writer in a way that is more verbose and | ||
// sectionalized. | ||
// The output looks like: | ||
// Error from <Source>: | ||
// Reason: <reason> | ||
// | ||
// Message: | ||
// <Message> | ||
// | ||
// Original: | ||
// <Orig> | ||
func (e *Err) Print(w io.Writer) { | ||
fmt.Fprintf(w, "Error from %q\n", e.Source) | ||
fmt.Fprintf(w, "Reason: %s\n", e.Reason) | ||
if len(e.Message) > 0 { | ||
fmt.Fprintf(w, "\nMessage:\n") | ||
fmt.Fprintln(w, e.Message) | ||
} | ||
fmt.Fprintf(w, "\nOriginal error:\n") | ||
fmt.Fprintln(w, e.Orig) | ||
} | ||
|
||
var breakre = regexp.MustCompile(`\r?\n`) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
package terraform | ||
|
||
import ( | ||
"regexp" | ||
|
||
"github.com/pkg/errors" | ||
|
||
"github.com/openshift/installer/pkg/diagnostics" | ||
) | ||
|
||
// Diagnose accepts an error from terraform runs and tries to diagnose the | ||
// underlying cause. | ||
func Diagnose(message string) error { | ||
for _, cand := range conditions { | ||
if cand.match.MatchString(message) { | ||
return &diagnostics.Err{ | ||
Source: "Infrastructure Provider", | ||
Reason: cand.reason, | ||
Message: cand.message, | ||
} | ||
} | ||
} | ||
|
||
return errors.New("failed to complete the change") | ||
} | ||
|
||
type condition struct { | ||
match *regexp.Regexp | ||
|
||
reason string | ||
message string | ||
} | ||
|
||
// conditions is a list matches for the error string from terraform. | ||
// specific on the top, generic matches on the bottom. | ||
var conditions = []condition{{ | ||
match: regexp.MustCompile(`Error: Error creating Blob .*: Error copy/waiting`), | ||
|
||
reason: "Timeout", | ||
message: `Copying the VHD to user environment was too slow, and timeout was reached for the success.`, | ||
}, { | ||
match: regexp.MustCompile(`Error: Error Creating/Updating Subnet .*: network.SubnetsClient#CreateOrUpdate: .* Code="AnotherOperationInProgress" Message="Another operation on this or dependent resource is in progress`), | ||
|
||
reason: "AzureMultiOperationFailure", | ||
message: `Creating Subnets failed because Azure could not process multiple operations.`, | ||
}, { | ||
match: regexp.MustCompile(`Error: Error Creating/Updating Public IP .*: network.PublicIPAddressesClient#CreateOrUpdate: .* Code="PublicIPCountLimitReached" Message="Cannot create more than .* public IP addresses for this subscription in this region`), | ||
|
||
reason: "AzureQuotaLimitExceeded", | ||
message: `Service limits exceeded for Public IPs in the the subscriptions for the region. Requesting increase in quota should fix the error.`, | ||
}, { | ||
match: regexp.MustCompile(`Error: compute\.VirtualMachinesClient#CreateOrUpdate: .* Code="OperationNotAllowed" Message="Operation could not be completed as it results in exceeding approved Total Regional Cores quota`), | ||
|
||
reason: "AzureQuotaLimitExceeded", | ||
message: `Service limits exceeded for Virtual Machine cores in the the subscriptions for the region. Requesting increase in quota should fix the error.`, | ||
}, { | ||
match: regexp.MustCompile(`Error: Code="OSProvisioningTimedOut"`), | ||
|
||
reason: "AzureVirtualMachineFailure", | ||
message: `Some virtual machines failed to provision in alloted time. Virtual machines can fail to provision if the bootstap virtual machine has failing services.`, | ||
}, { | ||
match: regexp.MustCompile(`Status=404 Code="ResourceGroupNotFound"`), | ||
|
||
reason: "AzureEventualConsistencyFailure", | ||
message: `Failed to find a resource that was recently created usualy caused by Azure's eventual consistency delays.`, | ||
}, { | ||
match: regexp.MustCompile(`Error: Error applying IAM policy to project .*: Too many conflicts`), | ||
|
||
reason: "GCPTooManyIAMUpdatesInFlight", | ||
message: `There are a lot of IAM updates to the project in flight. Failed after reaching a limit of read-modify-write on conflict backoffs.`, | ||
}, { | ||
match: regexp.MustCompile(`Error: .*: googleapi: Error 503: .*, backendError`), | ||
|
||
reason: "GCPBackendInternalError", | ||
message: `GCP is experiencing backend service interuptions. Please try again or contact Google Support`, | ||
}, { | ||
match: regexp.MustCompile(`Error: Error waiting for instance to create: Internal error`), | ||
|
||
reason: "GCPComputeBackendTimeout", | ||
message: `GCP is experiencing backend service interuptions, the compute instance failed to create in reasonable time.`, | ||
}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
package terraform | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestDiagnose(t *testing.T) { | ||
cases := []struct { | ||
input string | ||
err string | ||
}{{ | ||
input: `Error: Error creating Blob "rhcoskltwa.vhd" (Container "vhd" / Account "clusterkltwa"): Error copy/waiting: | ||
on ../tmp/openshift-install-348626978/main.tf line 169, in resource "azurerm_storage_blob" "rhcos_image":" | ||
169: resource "azurerm_storage_blob" "rhcos_image" { | ||
`, | ||
err: `error\(Timeout\) from Infrastructure Provider: Copying the VHD to user environment was too slow, and timeout was reached for the success\.`, | ||
}, { | ||
input: `Error: Error Creating/Updating Subnet "xxxx-master-subnet" (Virtual Network "xxxx-vnet" / Resource Group "xxxx-rg"): network.SubnetsClient#CreateOrUpdate: Failure sending request: StatusCode=0 -- Original Error: autorest/azure: Service returned an error. Status=<nil> Code="AnotherOperationInProgress" Message="Another operation on this or dependent resource is in progress. To retrieve status of the operation use uri: https://management.azure.com/subscriptions/d38f1e38-4bed-438e-b227-833f997adf6a/providers/Microsoft.Network/locations/eastus2/operations/62c8a417-7168-464f-83e6-96912bd6b30a?api-version=2019-09-01." Details=[] | ||
on ../tmp/openshift-install-513947104/vnet/vnet.tf line 10, in resource "azurerm_subnet" "master_subnet":" | ||
10: resource "azurerm_subnet" "master_subnet" { | ||
`, | ||
err: `error\(AzureMultiOperationFailure\) from Infrastructure Provider: Creating Subnets failed because Azure could not process multiple operations\.`, | ||
}, { | ||
input: `Error: Error Creating/Updating Public IP "xxxx-bootstrap-pip-v4" (Resource Group "xxxx-rg"): network.PublicIPAddressesClient#CreateOrUpdate: Failure sending request: StatusCode=400 -- Original Error: Code="PublicIPCountLimitReached" Message="Cannot create more than 50 public IP addresses for this subscription in this region." Details=[] | ||
on ../tmp/openshift-install-172932975/bootstrap/main.tf line 65, in resource "azurerm_public_ip" "bootstrap_public_ip_v4": | ||
65: resource "azurerm_public_ip" "bootstrap_public_ip_v4" { | ||
`, | ||
|
||
err: `error\(AzureQuotaLimitExceeded\) from Infrastructure Provider: Service limits exceeded for Public IPs in the the subscriptions for the region. Requesting increase in quota should fix the error\.`, | ||
}, { | ||
input: `Error: Code="OSProvisioningTimedOut" Message="OS Provisioning for VM 'xxxx-master-2' did not finish in the allotted time. The VM may still finish provisioning successfully. Please check provisioning state later. Also, make sure the image has been properly prepared (generalized).\\r\\n * Instructions for Windows: https://azure.microsoft.com/documentation/articles/virtual-machines-windows-upload-image/ \\r\\n * Instructions for Linux: https://azure.microsoft.com/documentation/articles/virtual-machines-linux-capture-image/ " | ||
on ../tmp/openshift-install-172932975/master/master.tf line 81, in resource "azurerm_virtual_machine" "master": | ||
81: resource "azurerm_virtual_machine" "master" { | ||
`, | ||
|
||
err: `error\(AzureVirtualMachineFailure\) from Infrastructure Provider: Some virtual machines failed to provision in alloted time`, | ||
}, { | ||
input: ` | ||
Error: Error waiting for instance to create: Internal error. Please try again or contact Google Support. (Code: '8712799794455203922') | ||
on ../tmp/openshift-install-910996711/master/main.tf line 31, in resource "google_compute_instance" "master": | ||
31: resource "google_compute_instance" "master" { | ||
`, | ||
|
||
err: `error\(GCPComputeBackendTimeout\) from Infrastructure Provider: GCP is experiencing backend service interuptions, the compute instance failed to create in reasonable time\.`, | ||
}, { | ||
input: `Error: Error reading Service Account "projects/project-id/serviceAccounts/[email protected]": googleapi: Error 503: The service is currently unavailable., backendError`, | ||
|
||
err: `error\(GCPBackendInternalError\) from Infrastructure Provider: GCP is experiencing backend service interuptions. Please try again or contact Google Support`, | ||
}, { | ||
input: ` | ||
Error: Error adding instances to InstanceGroup: googleapi: Error 503: Internal error. Please try again or contact Google Support. (Code: 'xxxx'), backendError | ||
on ../tmp/openshift-install-267295217/bootstrap/main.tf line 87, in resource "google_compute_instance_group" "bootstrap": | ||
87: resource "google_compute_instance_group" "bootstrap" { | ||
`, | ||
|
||
err: `error\(GCPBackendInternalError\) from Infrastructure Provider: GCP is experiencing backend service interuptions. Please try again or contact Google Support`, | ||
}, { | ||
input: ` | ||
Error: Error applying IAM policy to project "project-id": Too many conflicts. Latest error: Error setting IAM policy for project "project-id": googleapi: Error 409: There were concurrent policy changes. Please retry the whole read-modify-write with exponential backoff., aborted | ||
on ../tmp/openshift-install-392130810/master/main.tf line 26, in resource "google_project_iam_member" "master-service-account-user": | ||
26: resource "google_project_iam_member" "master-service-account-user" { | ||
`, | ||
|
||
err: `error\(GCPTooManyIAMUpdatesInFlight\) from Infrastructure Provider: There are a lot of IAM updates to the project in flight. Failed after reaching a limit of read-modify-write on conflict backoffs\.`, | ||
}, { | ||
input: ` | ||
Error: Error retrieving resource group: resources.GroupsClient#Get: Failure responding to request: StatusCode=404 -- Original Error: autorest/azure: Service returned an error. Status=404 Code="ResourceGroupNotFound" Message="Resource group 'xxxxx-rg' could not be found." | ||
on ../tmp/openshift-install-424775273/main.tf line 124, in resource "azurerm_resource_group" "main": | ||
124: resource "azurerm_resource_group" "main" { | ||
`, | ||
|
||
err: `error\(AzureEventualConsistencyFailure\) from Infrastructure Provider: Failed to find a resource that was recently created usualy caused by Azure's eventual consistency delays\.`, | ||
}, { | ||
input: ` | ||
Error: compute.VirtualMachinesClient#CreateOrUpdate: Failure sending request: StatusCode=0 -- Original Error: autorest/azure: Service returned an error. Status=<nil> Code="OperationNotAllowed" Message="Operation could not be completed as it results in exceeding approved Total Regional Cores quota. Additional details - Deployment Model: Resource Manager, Location: centralus, Current Limit: 200, Current Usage: 198, Additional Required: 8, (Minimum) New Limit Required: 206. Submit a request for Quota increase at https://aka.ms/ProdportalCRP/?#create/Microsoft.Support/Parameters/%7B%22subId%22:%225f675811-04fa-483f-9709-ffd8a9da03f0%22,%22pesId%22:%2206bfd9d3-516b-d5c6-5802-169c800dec89%22,%22supportTopicId%22:%22e12e3d1d-7fa0-af33-c6d0-3c50df9658a3%22%7D by specifying parameters listed in the ‘Details’ section for deployment to succeed. Please read more about quota limits at https://docs.microsoft.com/en-us/azure/azure-supportability/regional-quota-requests." | ||
on ../../../../tmp/openshift-install-941329162/master/master.tf line 81, in resource "azurerm_virtual_machine" "master": | ||
81: resource "azurerm_virtual_machine" "master" { | ||
`, | ||
|
||
err: `error\(AzureQuotaLimitExceeded\) from Infrastructure Provider: Service limits exceeded for Virtual Machine cores in the the subscriptions for the region\. Requesting increase in quota should fix the error\.`, | ||
}} | ||
|
||
for _, test := range cases { | ||
t.Run("", func(t *testing.T) { | ||
err := Diagnose(test.input) | ||
if test.err == "" { | ||
assert.NoError(t, err) | ||
} else { | ||
assert.Regexp(t, test.err, err) | ||
} | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters