Skip to content

Commit

Permalink
fix(resources duration): handle failed pod started time set to unix e…
Browse files Browse the repository at this point in the history
…poch

We have observed that containerd somtimes fails a pod on GKE and sets the `startedAt` value to
"1970-01-01T00:00:00Z" which causes argo workflows to calculate invalid resources duration.

fix #13709
  • Loading branch information
AntoineDao committed Oct 5, 2024
1 parent 68b22b8 commit 02e7d4d
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 1 deletion.
6 changes: 5 additions & 1 deletion util/resource/summary.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,17 @@ import (
wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1"
)

func unixEpoch() time.Time {
return time.Unix(0, 0).UTC()
}

type Summary struct {
ResourceList corev1.ResourceList
ContainerState corev1.ContainerState
}

func (s Summary) age() time.Duration {
if s.ContainerState.Terminated != nil {
if s.ContainerState.Terminated != nil && s.ContainerState.Terminated.StartedAt.Time != unixEpoch() {
return s.ContainerState.Terminated.FinishedAt.Time.Sub(s.ContainerState.Terminated.StartedAt.Time)
} else {
return 0
Expand Down
73 changes: 73 additions & 0 deletions util/resource/summary_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package resource

import (
"testing"
"time"

wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1"
"github.com/stretchr/testify/assert"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

corev1 "k8s.io/api/core/v1"
)

func TestSummaries_Duration(t *testing.T) {
startTime := time.Now().Add(-1 * time.Hour)
finishTime := time.Now()

resourceList := corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("1Gi"),
}

summaries := Summaries{
"container1": {
ResourceList: resourceList,
ContainerState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
StartedAt: metav1.NewTime(startTime),
FinishedAt: metav1.NewTime(finishTime),
},
},
},
}

expectedDuration := wfv1.ResourcesDuration{}
expectedDuration = expectedDuration.Add(wfv1.ResourcesDuration{
corev1.ResourceCPU: wfv1.NewResourceDuration(3600 * time.Second),
corev1.ResourceMemory: wfv1.NewResourceDuration(36864 * time.Second),
})

assert.Equal(t, expectedDuration, summaries.Duration())
}

func TestSummaries_Duration_StartedAtIsEpoch(t *testing.T) {
startedTime, err := time.Parse(time.RFC3339, "1970-01-01T00:00:00Z")
assert.NoError(t, err)
finishTime := time.Now()

resourceList := corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("1Gi"),
}

summaries := Summaries{
"container1": {
ResourceList: resourceList,
ContainerState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
StartedAt: metav1.NewTime(startedTime),
FinishedAt: metav1.NewTime(finishTime),
},
},
},
}

expectedDuration := wfv1.ResourcesDuration{}
expectedDuration = expectedDuration.Add(wfv1.ResourcesDuration{
corev1.ResourceCPU: wfv1.NewResourceDuration(0 * time.Second),
corev1.ResourceMemory: wfv1.NewResourceDuration(0 * time.Second),
})
assert.Equal(t, expectedDuration, summaries.Duration())
}

0 comments on commit 02e7d4d

Please sign in to comment.