diff --git a/sdk/client/client.go b/sdk/client/client.go index 6429116fecf..5518f0c97cd 100644 --- a/sdk/client/client.go +++ b/sdk/client/client.go @@ -9,6 +9,7 @@ import ( "crypto/tls" "encoding/json" "encoding/xml" + "errors" "fmt" "io" "log" @@ -16,6 +17,7 @@ import ( "net" "net/http" "net/url" + "regexp" "runtime" "strconv" "strings" @@ -443,16 +445,8 @@ func (c *Client) Execute(ctx context.Context, req *Request) (*Response, error) { // Instantiate a RetryableHttp client and configure its CheckRetry func r := c.retryableClient(ctx, func(ctx context.Context, r *http.Response, err error) (bool, error) { - // First check for badly malformed responses - if r == nil { - if req.IsIdempotent() { - return true, nil - } - return false, fmt.Errorf("HTTP response was nil; connection may have been reset") - } - // Eventual consistency checks - if !c.DisableRetries { + if r != nil && !c.DisableRetries { if r.StatusCode == http.StatusFailedDependency { return true, nil } @@ -474,6 +468,14 @@ func (c *Client) Execute(ctx context.Context, req *Request) (*Response, error) { } } + // Check for failed connections etc and decide if retries are appropriate + if r == nil { + if req.IsIdempotent() { + return extendedRetryPolicy(r, err) + } + return false, fmt.Errorf("HTTP response was nil; connection may have been reset") + } + // Fall back to default retry policy to handle rate limiting, server errors etc. return retryablehttp.DefaultRetryPolicy(ctx, r, err) }) @@ -749,3 +751,93 @@ func containsStatusCode(expected []int, actual int) bool { return false } + +// extendedRetryPolicy extends the defaultRetryPolicy implementation in go-retryablehhtp with +// additional error conditions that should not be retried indefinitely +// `tcpDialTimeoutRe` - covers the +func extendedRetryPolicy(resp *http.Response, err error) (bool, error) { + // A regular expression to match the error returned by net/http when the + // configured number of redirects is exhausted. This error isn't typed + // specifically so we resort to matching on the error string. + redirectsErrorRe := regexp.MustCompile(`stopped after \d+ redirects\z`) + + // A regular expression to match the error returned by net/http when the + // scheme specified in the URL is invalid. This error isn't typed + // specifically so we resort to matching on the error string. + schemeErrorRe := regexp.MustCompile(`unsupported protocol scheme`) + + // A regular expression to match the error returned by net/http when a + // request header or value is invalid. This error isn't typed + // specifically so we resort to matching on the error string. + invalidHeaderErrorRe := regexp.MustCompile(`invalid header`) + + // A regular expression to match the error returned by net/http when the + // TLS certificate is not trusted. This error isn't typed + // specifically so we resort to matching on the error string. + notTrustedErrorRe := regexp.MustCompile(`certificate is not trusted`) + + // A regular expression to catch dial timeouts in the underlying TCP session + // connection + tcpDialTimeoutRe := regexp.MustCompile(`dial tcp .*: i/o timeout`) + + // A regular expression to match complete packet loss + completePacketLossRe := regexp.MustCompile(`EOF$`) + + if err != nil { + var v *url.Error + if errors.As(err, &v) { + // Don't retry if the error was due to too many redirects. + if redirectsErrorRe.MatchString(v.Error()) { + return false, v + } + + // Don't retry if the error was due to an invalid protocol scheme. + if schemeErrorRe.MatchString(v.Error()) { + return false, v + } + + // Don't retry if the error was due to an invalid header. + if invalidHeaderErrorRe.MatchString(v.Error()) { + return false, v + } + + // Don't retry if the error was due to TLS cert verification failure. + if notTrustedErrorRe.MatchString(v.Error()) { + return false, v + } + + if tcpDialTimeoutRe.MatchString(v.Error()) { + return false, v + } + + if completePacketLossRe.MatchString(v.Error()) { + return false, v + } + + var certificateVerificationError *tls.CertificateVerificationError + if ok := errors.As(v.Err, &certificateVerificationError); !ok { + return false, v + } + } + + // The error is likely recoverable so retry. + return true, nil + } + + // 429 Too Many Requests is recoverable. Sometimes the server puts + // a Retry-After response header to indicate when the server is + // available to start processing request from client. + if resp.StatusCode == http.StatusTooManyRequests { + return true, nil + } + + // Check the response code. We retry on 500-range responses to allow + // the server time to recover, as 500's are typically not permanent + // errors and may relate to outages on the server side. This will catch + // invalid response codes as well, like 0 and 999. + if resp.StatusCode == 0 || (resp.StatusCode >= 500 && resp.StatusCode != http.StatusNotImplemented) { + return true, fmt.Errorf("unexpected HTTP status %s", resp.Status) + } + + return false, nil +}