Skip to content

Commit

Permalink
feat: log verbose improvements (#3189)
Browse files Browse the repository at this point in the history
  • Loading branch information
levkohimins authored Jun 10, 2024
1 parent 7288964 commit 036c0ec
Show file tree
Hide file tree
Showing 9 changed files with 43 additions and 26 deletions.
4 changes: 2 additions & 2 deletions cli/commands/terraform/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -405,13 +405,13 @@ func runTerraformWithRetry(ctx context.Context, terragruntOptions *options.Terra
for i := 0; i < terragruntOptions.RetryMaxAttempts; i++ {
if out, err := shell.RunTerraformCommandWithOutput(ctx, terragruntOptions, terragruntOptions.TerraformCliArgs...); err != nil {
if out == nil || !isRetryable(terragruntOptions, out) {
terragruntOptions.Logger.Errorf("%s invocation failed in %s", terragruntOptions.TerraformImplementation, terragruntOptions.WorkingDir)
terragruntOptions.Logger.WithError(err).Errorf("%s invocation failed in %s", terragruntOptions.TerraformImplementation, terragruntOptions.WorkingDir)
return err
} else {
terragruntOptions.Logger.Infof("Encountered an error eligible for retrying. Sleeping %v before retrying.\n", terragruntOptions.RetrySleepIntervalSec)
select {
case <-ctx.Done():
return ctx.Err()
return errors.WithStackTrace(ctx.Err())
case <-time.After(terragruntOptions.RetrySleepIntervalSec):
// try again
}
Expand Down
6 changes: 5 additions & 1 deletion cli/provider_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,11 @@ func (cache *ProviderCache) TerraformCommandHook(ctx context.Context, opts *opti
}
}

caches := cache.providerService.WaitForCacheReady(cacheRequestID)
caches, err := cache.providerService.WaitForCacheReady(cacheRequestID)
if err != nil {
return nil, err
}

if err := getproviders.UpdateLockfile(ctx, opts.WorkingDir, caches); err != nil {
return nil, err
}
Expand Down
3 changes: 2 additions & 1 deletion cli/provider_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ func TestProviderCache(t *testing.T) {
assert.Regexp(t, testCase.expectedBodyReg, string(body))
}

providerService.WaitForCacheReady("")
_, err = providerService.WaitForCacheReady("")
require.NoError(t, err)

if testCase.expectedCachePath != "" {
assert.FileExists(t, filepath.Join(providerCacheDir, testCase.expectedCachePath))
Expand Down
1 change: 0 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ func checkForErrorsAndExit(err error) {
}
os.Exit(exitCode)
}

}

func printErrorWithStackTrace(err error) string {
Expand Down
2 changes: 1 addition & 1 deletion remote/remote_state_gcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ func createGCSBucketIfNecessary(ctx context.Context, gcsClient *storage.Client,
// To avoid any eventual consistency issues with creating a GCS bucket we use a retry loop.
description := fmt.Sprintf("Create GCS bucket %s", config.remoteStateConfigGCS.Bucket)

return util.DoWithRetry(ctx, description, gcpMaxRetries, gcpSleepBetweenRetries, logrus.DebugLevel, func() error {
return util.DoWithRetry(ctx, description, gcpMaxRetries, gcpSleepBetweenRetries, logrus.DebugLevel, func(ctx context.Context) error {
return CreateGCSBucketWithVersioning(gcsClient, config, terragruntOptions)
})
}
Expand Down
2 changes: 1 addition & 1 deletion remote/remote_state_s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ func createS3BucketIfNecessary(ctx context.Context, s3Client *s3.S3, config *Ext
// been performed should be a no-op.
description := fmt.Sprintf("Create S3 bucket with retry %s", config.remoteStateConfigS3.Bucket)

return util.DoWithRetry(ctx, description, s3MaxRetries, s3SleepBetweenRetries, logrus.DebugLevel, func() error {
return util.DoWithRetry(ctx, description, s3MaxRetries, s3SleepBetweenRetries, logrus.DebugLevel, func(ctx context.Context) error {
err := CreateS3BucketWithVersioningSSEncryptionAndAccessLogging(s3Client, config, terragruntOptions)
if err != nil {
if isBucketCreationErrorRetriable(err) {
Expand Down
28 changes: 18 additions & 10 deletions terraform/cache/services/provider_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"context"
"crypto/sha256"
"encoding/hex"
goerrors "errors"
"fmt"
"os"
"path"
Expand Down Expand Up @@ -79,6 +78,7 @@ type ProviderCache struct {
signature []byte
archiveCached bool
ready bool
err error

userProviderDir string
packageDir string
Expand Down Expand Up @@ -202,7 +202,7 @@ func (cache *ProviderCache) warmUp(ctx context.Context) error {
if util.FileExists(cache.DownloadURL) {
cache.archivePath = cache.DownloadURL
} else {
if err := util.DoWithRetry(ctx, fmt.Sprintf("Fetching provider %s", cache.Provider), maxRetriesFetchFile, retryDelayFetchFile, logrus.DebugLevel, func() error {
if err := util.DoWithRetry(ctx, fmt.Sprintf("Fetching provider %s", cache.Provider), maxRetriesFetchFile, retryDelayFetchFile, logrus.DebugLevel, func(ctx context.Context) error {
return util.FetchToFile(ctx, cache.DownloadURL, cache.archivePath)
}); err != nil {
return err
Expand Down Expand Up @@ -242,7 +242,7 @@ func (cache *ProviderCache) acquireLockFile(ctx context.Context) (*util.Lockfile
return nil, errors.WithStackTrace(err)
}

if err := util.DoWithRetry(ctx, fmt.Sprintf("Acquiring lock file %s", cache.lockfilePath), maxRetriesLockFile, retryDelayLockFile, logrus.DebugLevel, func() error {
if err := util.DoWithRetry(ctx, fmt.Sprintf("Acquiring lock file %s", cache.lockfilePath), maxRetriesLockFile, retryDelayLockFile, logrus.DebugLevel, func(ctx context.Context) error {
return lockfile.TryLock()
}); err != nil {
return nil, errors.Errorf("unable to acquire lock file %s (already locked?) try to remove the file manually: %w", cache.lockfilePath, err)
Expand Down Expand Up @@ -278,16 +278,24 @@ func NewProviderService(cacheDir, userCacheDir string) *ProviderService {

// WaitForCacheReady returns cached providers that were requested by `terraform init` from the cache server, with an URL containing the given `requestID` value.
// The function returns the value only when all cache requests have been processed.
func (service *ProviderService) WaitForCacheReady(requestID string) []getproviders.Provider {
func (service *ProviderService) WaitForCacheReady(requestID string) ([]getproviders.Provider, error) {
service.cacheReadyMu.Lock()
defer service.cacheReadyMu.Unlock()

var providers []getproviders.Provider
var (
providers []getproviders.Provider
merr = &multierror.Error{}
)

for _, provider := range service.providerCaches.FindByRequestID(requestID) {
providers = append(providers, provider)
if provider.err != nil {
merr = multierror.Append(merr, fmt.Errorf("unable to cache provider: %s, err: %w", provider, provider.err))
}
if provider.ready {
providers = append(providers, provider)
}
}
return providers
return providers, merr.ErrorOrNil()
}

// CacheProvider starts caching the given provider using non-blocking approach.
Expand Down Expand Up @@ -365,7 +373,7 @@ func (service *ProviderService) Run(ctx context.Context) error {
return nil
})
case <-ctx.Done():
if err := errGroup.Wait(); err != nil && !goerrors.Is(err, context.Canceled) {
if err := errGroup.Wait(); err != nil {
merr = multierror.Append(merr, err)
}

Expand All @@ -391,10 +399,10 @@ func (service *ProviderService) startProviderCaching(ctx context.Context, cache
}
defer lockfile.Unlock() //nolint:errcheck

if err := cache.warmUp(ctx); err != nil {
if cache.err = cache.warmUp(ctx); cache.err != nil {
os.Remove(cache.packageDir) //nolint:errcheck
os.Remove(cache.archivePath) //nolint:errcheck
return err
return cache.err
}
cache.ready = true

Expand Down
7 changes: 5 additions & 2 deletions util/lockfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,11 @@ func (lockfile *Lockfile) Unlock() error {
if err := lockfile.Flock.Unlock(); err != nil {
return errors.WithStackTrace(err)
}
if err := os.Remove(lockfile.Path()); err != nil {
return errors.WithStackTrace(err)

if FileExists(lockfile.Path()) {
if err := os.Remove(lockfile.Path()); err != nil {
return errors.WithStackTrace(err)
}
}
return nil
}
Expand Down
16 changes: 9 additions & 7 deletions util/retry.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,19 @@ import (
"fmt"
"time"

"github.com/gruntwork-io/go-commons/errors"
"github.com/gruntwork-io/terragrunt/pkg/log"
"github.com/sirupsen/logrus"
)

// DoWithRetry runs the specified action. If it returns a value, return that value. If it returns an error, sleep for
// sleepBetweenRetries and try again, up to a maximum of maxRetries retries. If maxRetries is exceeded, return a
// MaxRetriesExceeded error.
func DoWithRetry(ctx context.Context, actionDescription string, maxRetries int, sleepBetweenRetries time.Duration, logLevel logrus.Level, action func() error) error {
if ctx.Err() != nil {
return ctx.Err()
}

func DoWithRetry(ctx context.Context, actionDescription string, maxRetries int, sleepBetweenRetries time.Duration, logLevel logrus.Level, action func(ctx context.Context) error) error {
for i := 0; i <= maxRetries; i++ {
log.Logf(logLevel, actionDescription)

err := action()
err := action(ctx)
if err == nil {
return nil
}
Expand All @@ -29,11 +26,16 @@ func DoWithRetry(ctx context.Context, actionDescription string, maxRetries int,
return err
}

if ctx.Err() != nil {
log.Debugf("%s returned an error: %s.", actionDescription, err.Error())
return errors.WithStackTrace(ctx.Err())
}

log.Errorf("%s returned an error: %s. Retry %d of %d. Sleeping for %s and will try again.", actionDescription, err.Error(), i, maxRetries, sleepBetweenRetries)

select {
case <-ctx.Done():
return ctx.Err()
return errors.WithStackTrace(ctx.Err())
case <-time.After(sleepBetweenRetries):
// try again
}
Expand Down

0 comments on commit 036c0ec

Please sign in to comment.