Skip to content

Commit

Permalink
Dev/robin/9913 handle ratelimiting gracefully (#28)
Browse files Browse the repository at this point in the history
* tests: ability to retain pre-images for test local reader context

* fix: avoid masking 429 errors (and others) in verifyContext

Also add support for detecting 429 errors from the blobs sdk and
obtaining the Retry-After header value as a time.Duration

AB#9913

* fix: linter issues

* fix: review comment finish the comment

* fix: explain why a range check is  used in the tests for IsRateLimiting

---------

Co-authored-by: Robin Bryce <[email protected]>
  • Loading branch information
robinbryce and Robin Bryce authored Sep 19, 2024
1 parent 1fa5904 commit ca5d457
Show file tree
Hide file tree
Showing 5 changed files with 284 additions and 65 deletions.
129 changes: 129 additions & 0 deletions massifs/bloberrors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
package massifs

import (
"errors"
"fmt"
"net/http"
"strconv"
"time"

"github.com/Azure/azure-sdk-for-go/sdk/azcore"
azStorageBlob "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
)

const (
azblobBlobNotFound = "BlobNotFound"
)

func AsStorageError(err error) (azStorageBlob.StorageError, bool) {
serr := &azStorageBlob.StorageError{}
//nolint
ierr, ok := err.(*azStorageBlob.InternalError)
if ierr == nil || !ok {
return azStorageBlob.StorageError{}, false
}
if !ierr.As(&serr) {
return azStorageBlob.StorageError{}, false
}
return *serr, true
}

func AsResponseError(err error) (azcore.ResponseError, bool) {

var ok bool
var rerr *azcore.ResponseError

//nolint
if rerr, ok = err.(*azcore.ResponseError); ok {
return *rerr, true
}

// check for an InternalError that has ResponseError as its cause
rerr = &azcore.ResponseError{}

//nolint
ierr, ok := err.(*azStorageBlob.InternalError)
if ierr == nil || !ok {
return azcore.ResponseError{}, false
}
if !ierr.As(&rerr) {
return azcore.ResponseError{}, false
}
return *rerr, true
}

// WrapBlobNotFound tranlsates the err to ErrBlobNotFound if the actual error is
// the azure sdk blob not found error. In all cases where the original err is
// not the azure BlobNot found, the original err is returned as is. Including
// the case where it is nil
func WrapBlobNotFound(err error) error {
if err == nil {
return nil
}
serr, ok := AsStorageError(err)
if !ok {
return err
}
if serr.ErrorCode != azblobBlobNotFound {
return err
}
return fmt.Errorf("%s: %w", err.Error(), ErrBlobNotFound)
}

func IsBlobNotFound(err error) bool {
if err == nil {
return false
}
if errors.Is(err, ErrBlobNotFound) {
return true
}
serr, ok := AsStorageError(err)
if !ok {
return false
}
if serr.ErrorCode != azblobBlobNotFound {
return false
}
return true
}

// IsRateLimiting detects if the error is HTTP Status 429 Too Many Requests
// The recomended wait time is returned if it is available. If the returned wait
// time is zero, the caller should apply an appropriate default backoff.
func IsRateLimiting(err error) (time.Duration, bool) {
if err == nil {
return 0, false
}
rerr, ok := AsResponseError(err)
if !ok {
return 0, false
}
if rerr.StatusCode != http.StatusTooManyRequests {
return 0, false
}

// It is a 429, check if there is a Retry-After header and return the indicated time if possible.

// Retry-After header is optional, if it is not present, the caller should still see it as a 429
if rerr.RawResponse == nil {
return 0, true
}
retryAfter := rerr.RawResponse.Header.Get("Retry-After")
if retryAfter == "" {
return 0, true
}

// Try to parse Retry-After as an integer (seconds)
if seconds, err := strconv.Atoi(retryAfter); err == nil {
return time.Duration(seconds) * time.Second, true
}

// Try to parse Retry-After as a date
if retryTime, err := http.ParseTime(retryAfter); err == nil {
retryTime = retryTime.In(time.UTC) // crucial, as Until does not work with different locations
return time.Until(retryTime), true
}

// couldn't parse the time, but it is definitely a 429. the caller should apply an appropriate default backoff.
return 0, true
}
114 changes: 114 additions & 0 deletions massifs/bloberrors_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
package massifs

import (
"errors"
"net/http"
"testing"
"time"

"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/stretchr/testify/assert"
)

func TestIsRateLimiting(t *testing.T) {
tests := []struct {
name string
err error
minWait time.Duration
maxWait time.Duration
expectedWait time.Duration
expectedResult bool
}{
{
name: "429 with Retry-After header as date",
err: &azcore.ResponseError{
StatusCode: http.StatusTooManyRequests,
RawResponse: &http.Response{
Header: http.Header{
"Retry-After": []string{time.Now().Add(5 * time.Minute).UTC().Format(http.TimeFormat)},
},
},
},
minWait: 4*time.Minute + 59*time.Second,
maxWait: 6 * time.Minute, // remove the max wait if this turnes out to be flaky but it should be fine
expectedResult: true,
},
{
name: "nil error",
err: nil,
expectedWait: 0,
expectedResult: false,
},
{
name: "non-response error",
err: errors.New("some error"),
expectedWait: 0,
expectedResult: false,
},
{
name: "non-429 status code",
err: &azcore.ResponseError{
StatusCode: http.StatusInternalServerError,
},
expectedWait: 0,
expectedResult: false,
},
{
name: "429 without Retry-After header",
err: &azcore.ResponseError{
StatusCode: http.StatusTooManyRequests,
RawResponse: &http.Response{
Header: http.Header{},
},
},
expectedWait: 0,
expectedResult: true,
},
{
name: "429 with Retry-After header in seconds",
err: &azcore.ResponseError{
StatusCode: http.StatusTooManyRequests,
RawResponse: &http.Response{
Header: http.Header{
"Retry-After": []string{"10"},
},
},
},
expectedWait: 10 * time.Second,
expectedResult: true,
},
{
name: "429 with invalid Retry-After header",
err: &azcore.ResponseError{
StatusCode: http.StatusTooManyRequests,
RawResponse: &http.Response{
Header: http.Header{
"Retry-After": []string{"invalid"},
},
},
},
expectedWait: 0,
expectedResult: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {

wait, result := IsRateLimiting(tt.err)
// Note: we use a range check here because parsing the Retry-After header
// uses time.Until, which then calls time.Now, in the case where the header is a date.
// this means this test would be flaky. We could mock time.Now but that would be a lot of work
if tt.minWait > 0 {
assert.GreaterOrEqual(t, wait, tt.minWait)
}
if tt.maxWait > 0 {
assert.LessOrEqual(t, wait, tt.maxWait)
}
if tt.expectedWait > 0 || (tt.minWait == 0 && tt.maxWait == 0) {
assert.Equal(t, tt.expectedWait, wait)
}
assert.Equal(t, tt.expectedResult, result)
})
}
}
60 changes: 0 additions & 60 deletions massifs/blobnotfounderr.go

This file was deleted.

9 changes: 6 additions & 3 deletions massifs/massifcontextverified.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,12 @@ func (mc *MassifContext) verifyContext(

msg, state, err := options.sealGetter.GetSignedRoot(ctx, mc.TenantIdentity, mc.Start.MassifIndex)
if err != nil {
return nil, fmt.Errorf(
"%w: failed to get seal for massif %d for tenant %s: %v",
ErrSealNotFound, mc.Start.MassifIndex, mc.TenantIdentity, WrapBlobNotFound(err))
if IsBlobNotFound(err) {
return nil, fmt.Errorf(
"%w: failed to get seal for massif %d for tenant %s: %v",
ErrSealNotFound, mc.Start.MassifIndex, mc.TenantIdentity, WrapBlobNotFound(err))
}
return nil, err
}

state.Root, err = mmr.GetRoot(state.MMRSize, mc, sha256.New())
Expand Down
37 changes: 35 additions & 2 deletions massifs/testlocalreadercontext.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,42 @@ type TestLocalReaderContext struct {
AzuriteReader MassifReader
}

// TestLogCreatorContext holds the context data resulting from a call to CreateLog
// Unless one or more of the TEstCreateLogOptions are used, the context will not have anything interesting in it.
type TestLogCreatorContext struct {
Preimages map[uint64][]byte
}

type TestCreateLogOption func(*TestLogCreatorContext)

func TestWithCreateLogPreImages() TestCreateLogOption {
return func(c *TestLogCreatorContext) {
c.Preimages = make(map[uint64][]byte)
}
}

// CreateLog creates a log with the given tenant identity, massif height, and mmr size,
// any previous seal or massif blobs for the same tenant are first deleted
func (c *TestLocalReaderContext) CreateLog(tenantIdentity string, massifHeight uint8, massifCount uint32) {
func (c *TestLocalReaderContext) CreateLog(
tenantIdentity string, massifHeight uint8, massifCount uint32,
opts ...TestCreateLogOption) {

logContext := &TestLogCreatorContext{}
for _, opt := range opts {
opt(logContext)
}

generator := MMRTestingGenerateNumberedLeaf

// If the caller needs to work with the pre-images we wrap the generator to retain them
if logContext.Preimages != nil {
generator = func(tenantIdentity string, base, i uint64) mmrtesting.AddLeafArgs {

args := generator(tenantIdentity, base, i)
logContext.Preimages[base+i] = args.Value
return args
}
}

// clear out any previous log
c.AzuriteContext.DeleteBlobsByPrefix(TenantMassifPrefix(tenantIdentity))
Expand All @@ -35,7 +68,7 @@ func (c *TestLocalReaderContext) CreateLog(tenantIdentity string, massifHeight u
CommitmentEpoch: 1,
MassifHeight: massifHeight,
SealOnCommit: true, // create seals for each massif as we go
}, c.AzuriteContext, c.G, MMRTestingGenerateNumberedLeaf)
}, c.AzuriteContext, c.G, generator)
require.NoError(c.AzuriteContext.T, err)

leavesPerMassif := mmr.HeightIndexLeafCount(uint64(massifHeight) - 1)
Expand Down

0 comments on commit ca5d457

Please sign in to comment.