Skip to content

Commit

Permalink
Merge pull request #123 from RachelTucker/get-in-order-larger-than-cache
Browse files Browse the repository at this point in the history
OTHER: create helper to retrieve objects in-order that exceed available cache
  • Loading branch information
LogicalChaos authored Mar 19, 2021
2 parents 6f15864 + b3e8c9c commit 990cc35
Show file tree
Hide file tree
Showing 4 changed files with 414 additions and 4 deletions.
5 changes: 2 additions & 3 deletions helpers/getTransfernator.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func newBulkGetRequest(bucketName string, readObjects *[]helperModels.GetObject,
func createPartialGetObjects(getObject helperModels.GetObject) []ds3Models.Ds3GetObject {
// handle getting the entire object
if len(getObject.Ranges) == 0 {
return []ds3Models.Ds3GetObject { { Name:getObject.Name }, }
return []ds3Models.Ds3GetObject { { Name:getObject.Name } }
}
// handle partial object retrieval
var partialObjects []ds3Models.Ds3GetObject
Expand Down Expand Up @@ -123,11 +123,10 @@ func (transceiver *getTransceiver) transfer() (string, error) {
consumer := newConsumer(&queue, &waitGroup, transceiver.Strategy.BlobStrategy.maxConcurrentTransfers(), doneNotifier)

// Wait for completion of producer-consumer goroutines
var aggErr ds3Models.AggregateError
waitGroup.Add(1) // adding producer and consumer goroutines to wait group
go consumer.run()
err = producer.run() // producer will add to waitGroup for every blob retrieval added to queue, and each transfer performed will decrement from waitGroup
waitGroup.Wait()

return bulkGetResponse.MasterObjectList.JobId, aggErr.GetErrors()
return bulkGetResponse.MasterObjectList.JobId, nil
}
127 changes: 127 additions & 0 deletions helpers/helpersImpl.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
package helpers

import (
"fmt"
"github.com/SpectraLogic/ds3_go_sdk/ds3"
"github.com/SpectraLogic/ds3_go_sdk/ds3/models"
helperModels "github.com/SpectraLogic/ds3_go_sdk/helpers/models"
"github.com/SpectraLogic/ds3_go_sdk/helpers/ranges"
"net/http"
"sort"
"strings"
)

type HelperInterface interface {
Expand All @@ -20,6 +26,12 @@ type HelperInterface interface {
// A job ID will be returned if a BP job was successfully created, regardless of
// whether additional errors occur.
GetObjects(bucketName string, objects []helperModels.GetObject, strategy ReadTransferStrategy) (string, error)

// Retrieves the list of objects from the specified bucket on the Black Pearl.
// If a get job cannot be created due to insufficient cache space to fulfill an
// IN_ORDER processing guarantee, then the job is split across multiple BP jobs.
// This allows for the IN_ORDER retrieval of objects that exceed available cache space.
GetObjectsSpanningJobs(bucketName string, objects []helperModels.GetObject, strategy ReadTransferStrategy) ([]string, error)
}

type HelperImpl struct {
Expand Down Expand Up @@ -49,3 +61,118 @@ func (helper *HelperImpl) GetObjects(bucketName string, objects []helperModels.G
transceiver := newGetTransceiver(bucketName, &objects, &strategy, helper.client)
return transceiver.transfer()
}

func (helper *HelperImpl) GetObjectsSpanningJobs(bucketName string, objects []helperModels.GetObject, strategy ReadTransferStrategy) ([]string, error) {
// Attempt to send the entire job at once
jobId, err := helper.GetObjects(bucketName, objects, strategy)
if err == nil {
// success
return []string{jobId}, nil
} else if !helper.isCannotPreAllocateError(err) {
// error not related to pre-allocation
return nil, err
}

// Retrieve each file individually
var jobIds []string
for _, getObject := range objects {
fileJobIds := helper.retrieveIndividualFile(bucketName, getObject, strategy)
jobIds = append(jobIds, fileJobIds...)
}
return jobIds, nil
}

func (helper *HelperImpl) isCannotPreAllocateError(err error) bool {
badStatusErr, ok := err.(*models.BadStatusCodeError)
if !ok || badStatusErr.ActualStatusCode != http.StatusServiceUnavailable {
// failed to create bulk get for reason other than 503
return false
}

if strings.Contains(badStatusErr.Error(), "GET jobs that have a chunkClientProcessingOrderGuarantee of IN_ORDER must be entirely pre-allocated. Cannot pre-allocate") {
return true
}
return false
}

func (helper *HelperImpl) retrieveIndividualFile(bucketName string, getObject helperModels.GetObject, strategy ReadTransferStrategy) []string {
// Get the blob offsets
headObject, err := helper.client.HeadObject(models.NewHeadObjectRequest(bucketName, getObject.Name))
if err != nil {
getObject.ChannelBuilder.SetFatalError(err)
return nil
}
var offsets []int64
for offset := range headObject.BlobChecksums {
offsets = append(offsets, offset)
}

sort.Slice(offsets, func(i, j int) bool {
return offsets[i] < offsets[j]
})

// Get the object size
objectsDetails, err := helper.client.GetObjectsWithFullDetailsSpectraS3(
models.NewGetObjectsWithFullDetailsSpectraS3Request().
WithBucketId(bucketName).WithName(getObject.Name).
WithLatest(true))

if err != nil {
getObject.ChannelBuilder.SetFatalError(err)
return nil
} else if len(objectsDetails.DetailedS3ObjectList.DetailedS3Objects) < 1 {
getObject.ChannelBuilder.SetFatalError(fmt.Errorf("failed to get object details"))
return nil
}

// Retrieve the object one blob at a time in order
objectCopy := getObject
objectEnd := objectsDetails.DetailedS3ObjectList.DetailedS3Objects[0].Size - 1
if len(objectCopy.Ranges) == 0 {
// If the user didn't specify a range, add a range that covers the entire file
// so that we can use the blobRangeFinder to tell us what ranges to specify.
objectCopy.Ranges = append(objectCopy.Ranges, models.Range{Start: 0, End: objectEnd})
}
blobFinder := ranges.NewBlobRangeFinder(&[]helperModels.GetObject{objectCopy})

var jobIds []string
for i, offset := range offsets {
var blobEnd int64
if i+1 < len(offsets) {
blobEnd = offsets[i+1]-1
} else {
blobEnd = objectEnd
}
length := blobEnd - offset + 1
blobRanges := blobFinder.GetRanges(objectCopy.Name, offset, length)
if len(blobRanges) == 0 {
// This blob does not need to be retrieved
continue
}

jobId, err := helper.retrieveBlob(bucketName, getObject, blobRanges, strategy)
if err != nil {
getObject.ChannelBuilder.SetFatalError(err)
return nil
}
jobIds = append(jobIds, jobId)

if objectCopy.ChannelBuilder.HasFatalError() {
// Failed to retrieve a portion of the file, don't bother with the rest
break
}
}
return jobIds
}

func (helper *HelperImpl) retrieveBlob(bucketName string, getObject helperModels.GetObject, blobRanges []models.Range, strategy ReadTransferStrategy) (string, error) {
// Since there is only one blob being retrieved, create the job with Order-Guarantee=None so that the
// job will wait if cache needs to be reclaimed on the BP before the chunk can be allocated.
getObjectBlob := getObject
getObjectBlob.Ranges = blobRanges

strategyCopy := strategy
strategyCopy.Options.ChunkClientProcessingOrderGuarantee = models.JOB_CHUNK_CLIENT_PROCESSING_ORDER_GUARANTEE_NONE

return helper.GetObjects(bucketName, []helperModels.GetObject{getObjectBlob}, strategyCopy)
}
Loading

0 comments on commit 990cc35

Please sign in to comment.