diff --git a/dkron/execution.go b/dkron/execution.go index 8f6b7642b..3a1b5befe 100644 --- a/dkron/execution.go +++ b/dkron/execution.go @@ -2,6 +2,7 @@ package dkron import ( "fmt" + "math" "strconv" "time" @@ -9,6 +10,8 @@ import ( "google.golang.org/protobuf/types/known/timestamppb" ) +const defaultRetryInterval = 500 * time.Millisecond + // Execution type holds all of the details of a specific Execution. type Execution struct { // Id is the Key for this execution @@ -91,3 +94,13 @@ func (e *Execution) Key() string { func (e *Execution) GetGroup() string { return strconv.FormatInt(e.Group, 10) } + +func (e *Execution) CalculateExponentialBackoff() time.Duration { + now := time.Now() + if now.Before(e.StartedAt) { + return 0 + } + diff := now.Sub(e.StartedAt) + backoff := math.Log2(float64(diff/defaultRetryInterval)) + float64(e.Attempt) + return time.Duration(backoff) * defaultRetryInterval +} diff --git a/dkron/grpc.go b/dkron/grpc.go index 01329d44f..e3d9d54e6 100644 --- a/dkron/grpc.go +++ b/dkron/grpc.go @@ -217,14 +217,19 @@ func (grpcs *GRPCServer) ExecutionDone(ctx context.Context, execDoneReq *proto.E // Keep all execution properties intact except the last output execution.Output = "" + eb := execution.CalculateExponentialBackoff() grpcs.logger.WithFields(logrus.Fields{ "attempt": execution.Attempt, "execution": execution, + "backoff": eb, }).Debug("grpc: Retrying execution") + time.Sleep(eb) + if _, err := grpcs.agent.Run(job.Name, execution); err != nil { return nil, err } + return &proto.ExecutionDoneResponse{ From: grpcs.agent.config.NodeName, Payload: []byte("retry"),