Skip to content

Commit

Permalink
MON-3934: Parallelize some readonly/non disruptive e2e tests for fast…
Browse files Browse the repository at this point in the history
…er runs

Isolate specific tests to enable parallel execution

Enhance the resilience of some tests and fix those prone to errors.
  • Loading branch information
machine424 committed Aug 2, 2024
1 parent 491ab3b commit a1cd4c2
Show file tree
Hide file tree
Showing 21 changed files with 355 additions and 397 deletions.
4 changes: 2 additions & 2 deletions pkg/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -1488,7 +1488,7 @@ func (c *Client) CreateOrUpdateConfigMap(ctx context.Context, cm *v1.ConfigMap)
return err
}

func (c *Client) DeleteIfExists(ctx context.Context, nsName string) error {
func (c *Client) DeleteNSIfExists(ctx context.Context, nsName string) error {
nClient := c.kclient.CoreV1().Namespaces()
_, err := nClient.Get(ctx, nsName, metav1.GetOptions{})
if apierrors.IsNotFound(err) {
Expand All @@ -1502,7 +1502,7 @@ func (c *Client) DeleteIfExists(ctx context.Context, nsName string) error {

err = nClient.Delete(ctx, nsName, metav1.DeleteOptions{})
if err != nil {
return fmt.Errorf("deleting ConfigMap object failed: %w", err)
return fmt.Errorf("deleting Namespace object failed: %w", err)
}
return nil
}
Expand Down
2 changes: 2 additions & 0 deletions test/e2e/alert_relabel_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ const (
)

func TestAlertRelabelConfig(t *testing.T) {
// The test shouldn't be disruptive, safe to run in parallel with others.
t.Parallel()
initialRelabelConfig := prometheusRelabelConfig(t)

// By default, we drop prometheus_replica label + add openshift_io_alert_source = 2
Expand Down
3 changes: 3 additions & 0 deletions test/e2e/alerting_rule_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ const (
)

func TestAlertingRule(t *testing.T) {
// The test shouldn't be disruptive, safe to run in parallel with others.
t.Parallel()
ctx := context.Background()
alertingRules := f.OpenShiftMonitoringClient.MonitoringV1().AlertingRules(f.Ns)

Expand Down Expand Up @@ -167,6 +169,7 @@ func prometheusRuleCount(t *testing.T) int {
}

func assertPrometheusRuleCount(t *testing.T, count int) {
t.Helper()
currentCount := prometheusRuleCount(t)
if currentCount != count {
t.Fatalf("Different generated PrometheusRule count (%d != %d)", currentCount, count)
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/alertmanager_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ func (wr *webhookReceiver) getAlertsByID(id string) ([]alert, error) {
func (wr *webhookReceiver) tearDown(t *testing.T, f *framework.Framework) {
t.Helper()
err := framework.Poll(time.Second, 5*time.Minute, func() error {
return f.OperatorClient.DeleteIfExists(ctx, wr.namespace)
return f.OperatorClient.DeleteNSIfExists(ctx, wr.namespace)
})

if err != nil {
Expand Down
54 changes: 7 additions & 47 deletions test/e2e/alertmanager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,10 @@ func TestAlertmanagerDataReplication(t *testing.T) {
),
},
} {
t.Run(test.name, test.assertion)
t.Run(test.name, func(t *testing.T) {
t.Parallel()
test.assertion(t)
})
}

// Ensure that the silence has been preserved.
Expand Down Expand Up @@ -465,6 +468,8 @@ func TestAlertmanagerDataReplication(t *testing.T) {

// The Alertmanager API should be protected by authentication/authorization.
func TestAlertmanagerAPI(t *testing.T) {
// The test shouldn't be disruptive, safe to run in parallel with others.
t.Parallel()
err := framework.Poll(5*time.Second, 5*time.Minute, func() error {
body, err := f.AlertmanagerClient.GetAlertmanagerAlerts(
"filter", `alertname="Watchdog"`,
Expand Down Expand Up @@ -747,51 +752,6 @@ func TestAlertmanagerDisabling(t *testing.T) {
})
}

func TestAlertManagerHasAdditionalAlertRelabelConfigs(t *testing.T) {
const (
expectPlatformLabel = "openshift_io_alert_source"
expectPlatformLabelValue = "platform"
)

type Alerts []struct {
Labels map[string]string `json:"labels"`
}

var alerts Alerts

err := framework.Poll(5*time.Second, time.Minute, func() error {
resp, err := f.AlertmanagerClient.Do("GET", "/api/v2/alerts", nil)
if err != nil {
return err
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return fmt.Errorf("expecting 200 status code, got %d (%q)", resp.StatusCode, resp.Body)
}

if err := json.NewDecoder(resp.Body).Decode(&alerts); err != nil {
return fmt.Errorf("error decoding alert response")
}

return nil
})
if err != nil {
t.Fatal(err)
}

for _, alert := range alerts {
v, found := alert.Labels[expectPlatformLabel]
if !found {
t.Fatal("expected correct label to be present")
}

if v != expectPlatformLabelValue {
t.Fatalf("expected correct value for %s but got %s", expectPlatformLabel, v)
}
}
}

// TestAlertmanagerConfigPipeline ensures that the AlertManagerConfig CR's
// created in a user namespace can be reconciled and have alerts sent to the
// correct Alertmanager (depending on whether user-defined Alertmanager is
Expand Down Expand Up @@ -907,7 +867,7 @@ func testAlertmanagerConfigPipeline(t *testing.T, wr *webhookReceiver, am *monit
t.Fatal(err)
}

if err := createUWMTestNsIfNotExist(t, f); err != nil {
if err := createUWMTestNsIfNotExist(t, f, userWorkloadTestNs); err != nil {
t.Fatal(err)
}

Expand Down
31 changes: 10 additions & 21 deletions test/e2e/alertmanager_user_workload_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,7 @@ func TestUserWorkloadAlertmanager(t *testing.T) {
f.AssertStatefulSetExistsAndRollout("alertmanager-user-workload", f.UserWorkloadMonitoringNs)(t)
f.AssertServiceExists("alertmanager-user-workload", f.UserWorkloadMonitoringNs)(t)

for _, scenario := range []struct {
name string
f func(*testing.T)
}{
{
name: "assert UWM alert access",
f: assertUWMAlertsAccess,
},
} {
t.Run(scenario.name, scenario.f)
}
t.Run("assert UWM alert access", assertUWMAlertsAccess)
}

// assertUWMAlertsAccess ensures that a user can't access all alerts from the UWM alertmanager via the api.
Expand Down Expand Up @@ -79,17 +69,16 @@ func assertUWMAlertsAccess(t *testing.T) {
t.Fatal(err)
}

// The uwm alerts port (9095) is only exposed in-cluster, so we need to use
// port forwarding to access kube-rbac-proxy.
host, cleanUp, err := f.ForwardPort(t, f.UserWorkloadMonitoringNs, "alertmanager-user-workload", 9095)
if err != nil {
t.Fatal(err)
}
defer cleanUp()

client := framework.NewPrometheusClient(host, token)

err = framework.Poll(5*time.Second, time.Minute, func() error {
// The uwm alerts port (9095) is only exposed in-cluster, so we need to use
// port forwarding to access kube-rbac-proxy.
host, cleanUp, err := f.ForwardPort(t, f.UserWorkloadMonitoringNs, "alertmanager-user-workload", 9095)
if err != nil {
t.Fatal(err)
}
defer cleanUp()

client := framework.NewPrometheusClient(host, token)
resp, err := client.Do("GET", "/api/v2/alerts", nil)
if err != nil {
return err
Expand Down
25 changes: 20 additions & 5 deletions test/e2e/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,10 @@ func TestClusterMonitorPrometheusK8Config(t *testing.T) {
assertion: f.AssertPrometheusRuleExists(thanosRule, f.Ns),
},
} {
t.Run(tc.name, tc.assertion)
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
tc.assertion(t)
})
}
}

Expand Down Expand Up @@ -344,7 +347,10 @@ func TestClusterMonitorAlertManagerConfig(t *testing.T) {
),
},
} {
t.Run(tc.name, tc.assertion)
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
tc.assertion(t)
})
}
}

Expand Down Expand Up @@ -664,7 +670,10 @@ func TestUserWorkloadMonitorPrometheusK8Config(t *testing.T) {
assertion: assertQueryLogValueEquals(f.UserWorkloadMonitoringNs, crName, "/tmp/test.log"),
},
} {
t.Run(tc.name, tc.assertion)
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
tc.assertion(t)
})
}
}

Expand Down Expand Up @@ -721,7 +730,10 @@ func TestUserWorkloadMonitorThanosRulerConfig(t *testing.T) {
),
},
} {
t.Run(tc.name, tc.assertion)
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
tc.assertion(t)
})
}
}

Expand Down Expand Up @@ -767,7 +779,10 @@ monitoringPlugin:
),
},
} {
t.Run(tc.name, tc.assertion)
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
tc.assertion(t)
})
}
}

Expand Down
6 changes: 3 additions & 3 deletions test/e2e/framework/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,12 @@ func (f *Framework) MustGetStatefulSet(t *testing.T, name, namespace string) *ap
return statefulSet
}

// MustGetPods return all pods from `namespace` within 5 minutes or fail
func (f *Framework) MustGetPods(t *testing.T, namespace string) *v1.PodList {
// MustListPods returns all pods matching labelSelector from `namespace` within 5 minutes or fail
func (f *Framework) MustListPods(t *testing.T, namespace string, labelSelector string) *v1.PodList {
t.Helper()
var pods *v1.PodList
err := wait.Poll(time.Second, 5*time.Minute, func() (bool, error) {
pl, err := f.KubeClient.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{})
pl, err := f.KubeClient.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector})
if err != nil {
return false, nil
}
Expand Down
101 changes: 36 additions & 65 deletions test/e2e/image_registry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,86 +2,57 @@ package e2e

import (
"net/url"
"strings"
"testing"

v1 "k8s.io/api/core/v1"
"github.com/stretchr/testify/require"
)

// TestImageRegistryPods ensure that all the containers images in the Platform monitoring
// ns are from the same registry than the CMO's image
func TestImageRegistryPods(t *testing.T) {
var pods *v1.PodList

// Get all pods in openshift-monitoring namespace.
var urlRegistry string
pods = f.MustGetPods(t, f.Ns)

// use CMO image's registry as a reference for all other containers
for _, pod := range pods.Items {
if strings.Contains(pod.Name, "cluster-monitoring-operator") {
imageUrl, err := url.Parse("stubheader://" + pod.Spec.Containers[0].Image)
if err != nil {
t.Fatalf("Fail to decode host: %v", err)
}
urlRegistry = imageUrl.Host
break
}
}
cmoImageRegistryIsUsedInNsAssert(t, f.Ns)
}

if urlRegistry == "" {
t.Fatalf("CMO pod not found")
func cmoImageRegistryIsUsedInNsAssert(t *testing.T, ns string) func(t *testing.T) {
return func(t *testing.T) {
assertCMOImageRegistryIsUsed(t, ns)
}
}

for _, pod := range pods.Items {

for _, container := range pod.Spec.Containers {

// We consider the hostname part of image URL be the image registry
imageUrl, err := url.Parse("stubheader://" + container.Image)
if err != nil {
t.Fatalf("Fail to decode host: %v", err)
}

if imageUrl.Host != urlRegistry {
t.Fatalf("Pod %s Container %s registry %s differs from CMO registry %s", pod.Name, container.Name, imageUrl.Host, urlRegistry)
}
func assertCMOImageRegistryIsUsed(t *testing.T, ns string) {
getRegistry := func(t *testing.T, image string) string {
// This first attempt is needed; otherwise, we may blindly add a second scheme,
// and the initial one will be considered the hostname.
u, err := url.ParseRequestURI(image)
if err == nil {
return u.Host
}

// Maybe no scheme, add one.
u, err = url.ParseRequestURI("stubheader://" + image)
require.NoError(t, err)
return u.Host
}

setupUserWorkloadAssetsWithTeardownHook(t, f)
uwmCM := f.BuildUserWorkloadConfigMap(t,
`prometheus:
enforcedTargetLimit: 10
volumeClaimTemplate:
spec:
resources:
requests:
storage: 2Gi
`,
)
f.MustCreateOrUpdateConfigMap(t, uwmCM)
defer f.MustDeleteConfigMap(t, uwmCM)

f.AssertStatefulSetExistsAndRollout("prometheus-user-workload", f.UserWorkloadMonitoringNs)(t)
setupUserApplication(t, f)

pods = f.MustGetPods(t, f.UserWorkloadMonitoringNs)
cmoPod := f.MustListPods(t, f.Ns, "app.kubernetes.io/name=cluster-monitoring-operator")
// Get all pods
pods := f.MustListPods(t, ns, "")
require.Greater(t, len(pods.Items), 1)

// Get CMO registry
var cmoRegistry string
for _, pod := range cmoPod.Items {
containers := pod.Spec.Containers
require.Len(t, containers, 1, "the check assumes only one container is present")
cmoRegistry = getRegistry(t, containers[0].Image)
break
}
require.NotEmpty(t, cmoRegistry)

// Check equality with the others'
for _, pod := range pods.Items {

for _, container := range pod.Spec.Containers {

// We consider the hostname part of image URL be the image registry
imageUrl, err := url.Parse("stubheader://" + container.Image)
if err != nil {
t.Fatalf("Fail to decode host: %v", err)
}

if imageUrl.Host != urlRegistry {
t.Fatalf("UWM Pod %s Container %s registry %s differs from CMO registry %s", pod.Name, container.Name, imageUrl.Host, urlRegistry)
}
require.Equal(t, cmoRegistry, getRegistry(t, container.Image))
}

}

}
4 changes: 4 additions & 0 deletions test/e2e/kube_state_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ import (
)

func TestKSMMetricsSuppression(t *testing.T) {
// The test is "read-only", safe to run in parallel with others.
t.Parallel()

suppressedPattern, _ := regexp.Compile("kube_.*_annotations")

Expand Down Expand Up @@ -74,6 +76,8 @@ func TestKSMMetricsSuppression(t *testing.T) {
}

func TestKSMCRSMetrics(t *testing.T) {
// The test shouldn't be disruptive, safe to run in parallel with others.
t.Parallel()
const timeout = 5 * time.Minute
assetsDir := "./assets"
ksmCRSMetricPrefix := "kube_customresource"
Expand Down
Loading

0 comments on commit a1cd4c2

Please sign in to comment.